def get_all_sound_ids_from_solr(limit=False): logger.info("getting all sound ids from solr.") if not limit: limit = 99999999999999 solr = Solr(settings.SOLR_URL) solr_ids = [] solr_count = None PAGE_SIZE = 2000 current_page = 1 try: while (len(solr_ids) < solr_count or solr_count == None) and len(solr_ids) < limit: # print "Getting page %i" % current_page response = SolrResponseInterpreter( solr.select( unicode( search_prepare_query( "", "", search_prepare_sort("created asc", SEARCH_SORT_OPTIONS_WEB), current_page, PAGE_SIZE, include_facets=False, ) ) ) ) solr_ids += [element["id"] for element in response.docs] solr_count = response.num_found current_page += 1 except Exception, e: raise Exception(e)
def get_all_sound_ids_from_solr(limit=False): logger.info("getting all sound ids from solr.") if not limit: limit = 99999999999999 solr = Solr(settings.SOLR_URL) solr_ids = [] solr_count = None PAGE_SIZE = 2000 current_page = 1 while (len(solr_ids) < solr_count or solr_count is None) and len(solr_ids) < limit: response = SolrResponseInterpreter( solr.select( unicode( search_prepare_query('', '', search_prepare_sort( 'created asc', SEARCH_SORT_OPTIONS_WEB), current_page, PAGE_SIZE, include_facets=False)))) solr_ids += [element['id'] for element in response.docs] solr_count = response.num_found current_page += 1 return sorted(solr_ids)
def check_if_sound_exists_in_solr(sound): solr = Solr(settings.SOLR_URL) response = SolrResponseInterpreter( solr.select( unicode( search_prepare_query( '', 'id:%i' % sound.id, search_prepare_sort('created asc', SEARCH_SORT_OPTIONS_WEB), 1, 1)))) return response.num_found > 0
def get_solr_results(search_form, page_size, max_pages, start_page=1, valid_ids=None, solr=None, offset=None): if not solr: solr = Solr(settings.SOLR_URL) query_filter = search_form.cleaned_data['filter'] if valid_ids: # Update solr filter to only return results in valid ids ids_filter = 'id:(' + ' OR '.join([str(item) for item in valid_ids]) + ')' if query_filter: query_filter += ' %s' % ids_filter else: query_filter = ids_filter solr_ids = [] solr_count = None try: current_page = start_page n_page_requests = 1 # Iterate over solr result pages while (len(solr_ids) < solr_count or solr_count == None) and n_page_requests <= max_pages: query = search_prepare_query(unquote( search_form.cleaned_data['query'] or ""), unquote(query_filter or ""), search_form.cleaned_data['sort'], current_page, page_size, grouping=False, include_facets=False, offset=offset) result = SolrResponseInterpreter(solr.select(unicode(query))) solr_ids += [element['id'] for element in result.docs] solr_count = result.num_found #print 'Solr page %i (total %i sounds)' % (current_page, solr_count) current_page += 1 n_page_requests += 1 except SolrException as e: raise ServerErrorException(msg='Search server error: %s' % e.message) except Exception as e: raise ServerErrorException( msg= 'The search server could not be reached or some unexpected error occurred.' ) return solr_ids, solr_count
def get_all_sound_ids_from_solr(limit=False): logger.info("getting all sound ids from solr.") if not limit: limit = 99999999999999 solr = Solr(settings.SOLR_URL) solr_ids = [] solr_count = None PAGE_SIZE = 2000 current_page = 1 while (len(solr_ids) < solr_count or solr_count is None) and len(solr_ids) < limit: response = SolrResponseInterpreter( solr.select(unicode(search_prepare_query( '', '', search_prepare_sort('created asc', SEARCH_SORT_OPTIONS_WEB), current_page, PAGE_SIZE, include_facets=False)))) solr_ids += [element['id'] for element in response.docs] solr_count = response.num_found current_page += 1 return sorted(solr_ids)
def get_solr_results(search_form, page_size, max_pages, start_page=1, valid_ids=None, solr=None, offset=None): if not solr: solr = Solr(settings.SOLR_URL) query_filter = search_form.cleaned_data['filter'] if valid_ids: # Update solr filter to only return results in valid ids ids_filter = 'id:(' + ' OR '.join([str(item) for item in valid_ids]) + ')' if query_filter: query_filter += ' %s' % ids_filter else: query_filter = ids_filter solr_ids = [] solr_count = None try: current_page = start_page n_page_requests = 1 # Iterate over solr result pages while (len(solr_ids) < solr_count or solr_count == None) and n_page_requests <= max_pages: query = search_prepare_query(unquote(search_form.cleaned_data['query'] or ""), unquote(query_filter or ""), search_form.cleaned_data['sort'], current_page, page_size, grouping=False, include_facets=False, offset=offset) result = SolrResponseInterpreter(solr.select(unicode(query))) solr_ids += [element['id'] for element in result.docs] solr_count = result.num_found #print 'Solr page %i (total %i sounds)' % (current_page, solr_count) current_page += 1 n_page_requests += 1 except SolrException as e: raise ServerErrorException(msg='Search server error: %s' % e.message) except Exception as e: raise ServerErrorException(msg='The search server could not be reached or some unexpected error occurred.') return solr_ids, solr_count
def get_solr_results(search_form, page_size, max_pages, start_page=1, valid_ids=None, solr=None, offset=None): if not solr: solr = Solr(settings.SOLR_URL) query_filter = search_form.cleaned_data["filter"] if valid_ids: # Update solr filter to only return results in valid ids ids_filter = "id:(" + " OR ".join([str(item) for item in valid_ids]) + ")" if query_filter: query_filter += " %s" % ids_filter else: query_filter = ids_filter solr_ids = [] solr_count = None try: current_page = start_page n_page_requests = 1 # Iterate over solr result pages while (len(solr_ids) < solr_count or solr_count == None) and n_page_requests <= max_pages: query = search_prepare_query( unquote(search_form.cleaned_data["query"] or ""), unquote(query_filter or ""), search_form.cleaned_data["sort"], current_page, page_size, grouping=False, include_facets=False, offset=offset, ) result = SolrResponseInterpreter(solr.select(unicode(query))) solr_ids += [element["id"] for element in result.docs] solr_count = result.num_found # print 'Solr page %i (total %i sounds)' % (current_page, solr_count) current_page += 1 n_page_requests += 1 except SolrException, e: raise ServerErrorException(msg="Search server error: %s" % e.message)
elif e.status_code == 404: raise NotFoundException(msg=e.message, resource=resource) else: raise ServerErrorException(msg='Similarity server error: %s' % e.message, resource=resource) except Exception, e: raise ServerErrorException(msg='The similarity server could not be reached or some unexpected error occurred.', resource=resource) elif not search_form.cleaned_data['descriptors_filter'] and not search_form.cleaned_data['target'] and not target_file: # Standard text-based search try: solr = Solr(settings.SOLR_URL) query = search_prepare_query(unquote(search_form.cleaned_data['query'] or ""), unquote(search_form.cleaned_data['filter'] or ""), search_form.cleaned_data['sort'], search_form.cleaned_data['page'], search_form.cleaned_data['page_size'], grouping=search_form.cleaned_data['group_by_pack'], include_facets=False) result = SolrResponseInterpreter(solr.select(unicode(query))) solr_ids = [element['id'] for element in result.docs] solr_count = result.num_found more_from_pack_data = None if search_form.cleaned_data['group_by_pack']: # If grouping option is on, store grouping info in a dictionary that we can add when serializing sounds more_from_pack_data = dict([(int(element['id']), [element['more_from_pack'], element['pack_id'], element['pack_name']]) for element in result.docs]) return solr_ids, solr_count, None, more_from_pack_data, None, None, None
def api_search(search_form, target_file=None, extra_parameters=False, merging_strategy='merge_optimized', resource=None): if search_form.cleaned_data['query'] is None \ and search_form.cleaned_data['filter'] is None \ and not search_form.cleaned_data['descriptors_filter'] \ and not search_form.cleaned_data['target'] \ and not target_file: # No input data for search, return empty results return [], 0, None, None, None, None, None if search_form.cleaned_data['query'] is None and search_form.cleaned_data[ 'filter'] is None: # Standard content-based search try: results, count, note = similarity_api_search( target=search_form.cleaned_data['target'], filter=search_form.cleaned_data['descriptors_filter'], num_results=search_form.cleaned_data['page_size'], offset=(search_form.cleaned_data['page'] - 1) * search_form.cleaned_data['page_size'], target_file=target_file) gaia_ids = [result[0] for result in results] distance_to_target_data = None if search_form.cleaned_data['target'] or target_file: # Save sound distance to target into view class so it can be accessed by the serializer # We only do that when a target is specified (otherwise there is no meaningful distance value) distance_to_target_data = dict(results) gaia_count = count return gaia_ids, gaia_count, distance_to_target_data, None, note, None, None except SimilarityException as e: if e.status_code == 500: raise ServerErrorException(msg=e.message, resource=resource) elif e.status_code == 400: raise BadRequestException(msg=e.message, resource=resource) elif e.status_code == 404: raise NotFoundException(msg=e.message, resource=resource) else: raise ServerErrorException(msg='Similarity server error: %s' % e.message, resource=resource) except Exception as e: raise ServerErrorException( msg= 'The similarity server could not be reached or some unexpected error occurred.', resource=resource) elif not search_form.cleaned_data['descriptors_filter'] \ and not search_form.cleaned_data['target'] \ and not target_file: # Standard text-based search try: solr = Solr(settings.SOLR_URL) query = search_prepare_query( unquote(search_form.cleaned_data['query'] or ""), unquote(search_form.cleaned_data['filter'] or ""), search_form.cleaned_data['sort'], search_form.cleaned_data['page'], search_form.cleaned_data['page_size'], grouping=search_form.cleaned_data['group_by_pack'], include_facets=False) result = SolrResponseInterpreter(solr.select(unicode(query))) solr_ids = [element['id'] for element in result.docs] solr_count = result.num_found more_from_pack_data = None if search_form.cleaned_data['group_by_pack']: # If grouping option is on, store grouping info in a dictionary that we can add when serializing sounds more_from_pack_data = dict([(int(element['id']), [ element['more_from_pack'], element['pack_id'], element['pack_name'] ]) for element in result.docs]) return solr_ids, solr_count, None, more_from_pack_data, None, None, None except SolrException as e: if search_form.cleaned_data['filter'] is not None: raise BadRequestException( msg= 'Search server error: %s (please check that your filter syntax and field ' 'names are correct)' % e.message, resource=resource) raise BadRequestException(msg='Search server error: %s' % e.message, resource=resource) except Exception as e: raise ServerErrorException( msg= 'The search server could not be reached or some unexpected error occurred.', resource=resource) else: # Combined search (there is at least one of query/filter and one of descriptors_filter/target) # Strategies are implemented in 'combined_search_strategies' strategy = getattr(combined_search_strategies, merging_strategy) return strategy(search_form, target_file=target_file, extra_parameters=extra_parameters)
def get_stream_sounds(user, time_lapse): solr = Solr(settings.SOLR_URL) sort_str = search_prepare_sort("created desc", SEARCH_SORT_OPTIONS_WEB) # # USERS FOLLOWING # users_following = get_users_following(user) users_sounds = [] for user_following in users_following: filter_str = "username:"******" created:" + time_lapse query = search_prepare_query( "", filter_str, sort_str, 1, SOLR_QUERY_LIMIT_PARAM, grouping=False, include_facets=False ) result = SolrResponseInterpreter(solr.select(unicode(query))) if result.num_rows != 0: more_count = max(0, result.num_found - SOLR_QUERY_LIMIT_PARAM) # the sorting only works if done like this! more_url_params = [urllib.quote(filter_str), urllib.quote(sort_str[0])] # this is the same link but for the email has to be "quoted" more_url = u"?f=" + filter_str + u"&s=" + sort_str[0] # more_url_quoted = urllib.quote(more_url) sound_ids = [element['id'] for element in result.docs] sound_objs = sounds.models.Sound.objects.filter(id__in=sound_ids).select_related('license', 'user') new_count = more_count + len(sound_ids) users_sounds.append(((user_following, False), sound_objs, more_url_params, more_count, new_count)) # # TAGS FOLLOWING # tags_following = get_tags_following(user) tags_sounds = [] for tag_following in tags_following: tags = tag_following.split(" ") tag_filter_query = "" for tag in tags: tag_filter_query += "tag:" + tag + " " tag_filter_str = tag_filter_query + " created:" + time_lapse query = search_prepare_query( "", tag_filter_str, sort_str, 1, SOLR_QUERY_LIMIT_PARAM, grouping=False, include_facets=False ) result = SolrResponseInterpreter(solr.select(unicode(query))) if result.num_rows != 0: more_count = max(0, result.num_found - SOLR_QUERY_LIMIT_PARAM) # the sorting only works if done like this! more_url_params = [urllib.quote(tag_filter_str), urllib.quote(sort_str[0])] # this is the same link but for the email has to be "quoted" more_url = u"?f=" + tag_filter_str + u"&s=" + sort_str[0] # more_url_quoted = urllib.quote(more_url) sound_ids = [element['id'] for element in result.docs] sound_objs = sounds.models.Sound.objects.filter(id__in=sound_ids) new_count = more_count + len(sound_ids) tags_sounds.append((tags, sound_objs, more_url_params, more_count, new_count)) return users_sounds, tags_sounds
def api_search( search_form, target_file=None, extra_parameters=False, merging_strategy='merge_optimized', resource=None): if search_form.cleaned_data['query'] is None \ and search_form.cleaned_data['filter'] is None \ and not search_form.cleaned_data['descriptors_filter'] \ and not search_form.cleaned_data['target'] \ and not target_file: # No input data for search, return empty results return [], 0, None, None, None, None, None if search_form.cleaned_data['query'] is None and search_form.cleaned_data['filter'] is None: # Standard content-based search try: results, count, note = similarity_api_search( target=search_form.cleaned_data['target'], filter=search_form.cleaned_data['descriptors_filter'], num_results=search_form.cleaned_data['page_size'], offset=(search_form.cleaned_data['page'] - 1) * search_form.cleaned_data['page_size'], target_file=target_file) gaia_ids = [result[0] for result in results] distance_to_target_data = None if search_form.cleaned_data['target'] or target_file: # Save sound distance to target into view class so it can be accessed by the serializer # We only do that when a target is specified (otherwise there is no meaningful distance value) distance_to_target_data = dict(results) gaia_count = count return gaia_ids, gaia_count, distance_to_target_data, None, note, None, None except SimilarityException as e: if e.status_code == 500: raise ServerErrorException(msg=e.message, resource=resource) elif e.status_code == 400: raise BadRequestException(msg=e.message, resource=resource) elif e.status_code == 404: raise NotFoundException(msg=e.message, resource=resource) else: raise ServerErrorException(msg='Similarity server error: %s' % e.message, resource=resource) except Exception as e: raise ServerErrorException( msg='The similarity server could not be reached or some unexpected error occurred.', resource=resource) elif not search_form.cleaned_data['descriptors_filter'] \ and not search_form.cleaned_data['target'] \ and not target_file: # Standard text-based search try: solr = Solr(settings.SOLR_URL) query = search_prepare_query(unquote(search_form.cleaned_data['query'] or ""), unquote(search_form.cleaned_data['filter'] or ""), search_form.cleaned_data['sort'], search_form.cleaned_data['page'], search_form.cleaned_data['page_size'], grouping=search_form.cleaned_data['group_by_pack'], include_facets=False) result = SolrResponseInterpreter(solr.select(unicode(query))) solr_ids = [element['id'] for element in result.docs] solr_count = result.num_found more_from_pack_data = None if search_form.cleaned_data['group_by_pack']: # If grouping option is on, store grouping info in a dictionary that we can add when serializing sounds more_from_pack_data = dict([ (int(element['id']), [element['more_from_pack'], element['pack_id'], element['pack_name']]) for element in result.docs ]) return solr_ids, solr_count, None, more_from_pack_data, None, None, None except SolrException as e: if search_form.cleaned_data['filter'] is not None: raise BadRequestException(msg='Search server error: %s (please check that your filter syntax and field ' 'names are correct)' % e.message, resource=resource) raise BadRequestException(msg='Search server error: %s' % e.message, resource=resource) except Exception as e: raise ServerErrorException( msg='The search server could not be reached or some unexpected error occurred.', resource=resource) else: # Combined search (there is at least one of query/filter and one of descriptors_filter/target) # Strategies are implemented in 'combined_search_strategies' strategy = getattr(combined_search_strategies, merging_strategy) return strategy(search_form, target_file=target_file, extra_parameters=extra_parameters)
def get_stream_sounds(user, time_lapse): solr = Solr(settings.SOLR_URL) sort_str = search_prepare_sort("created desc", SEARCH_SORT_OPTIONS_WEB) # # USERS FOLLOWING # users_following = get_users_following(user) users_sounds = [] for user_following in users_following: filter_str = "username:"******" created:" + time_lapse query = search_prepare_query("", filter_str, sort_str, 1, SOLR_QUERY_LIMIT_PARAM, grouping=False, include_facets=False) result = SolrResponseInterpreter(solr.select(unicode(query))) if result.num_rows != 0: more_count = max(0, result.num_found - SOLR_QUERY_LIMIT_PARAM) # the sorting only works if done like this! more_url_params = [ urllib.quote(filter_str), urllib.quote(sort_str[0]) ] # this is the same link but for the email has to be "quoted" more_url = u"?f=" + filter_str + u"&s=" + sort_str[0] # more_url_quoted = urllib.quote(more_url) sound_ids = [element['id'] for element in result.docs] sound_objs = sounds.models.Sound.objects.filter(id__in=sound_ids) new_count = more_count + len(sound_ids) users_sounds.append(((user_following, False), sound_objs, more_url_params, more_count, new_count)) # # TAGS FOLLOWING # tags_following = get_tags_following(user) tags_sounds = [] for tag_following in tags_following: tags = tag_following.split(" ") tag_filter_query = "" for tag in tags: tag_filter_query += "tag:" + tag + " " tag_filter_str = tag_filter_query + " created:" + time_lapse query = search_prepare_query("", tag_filter_str, sort_str, 1, SOLR_QUERY_LIMIT_PARAM, grouping=False, include_facets=False) result = SolrResponseInterpreter(solr.select(unicode(query))) if result.num_rows != 0: more_count = max(0, result.num_found - SOLR_QUERY_LIMIT_PARAM) # the sorting only works if done like this! more_url_params = [ urllib.quote(tag_filter_str), urllib.quote(sort_str[0]) ] # this is the same link but for the email has to be "quoted" more_url = u"?f=" + tag_filter_str + u"&s=" + sort_str[0] # more_url_quoted = urllib.quote(more_url) sound_ids = [element['id'] for element in result.docs] sound_objs = sounds.models.Sound.objects.filter(id__in=sound_ids) new_count = more_count + len(sound_ids) tags_sounds.append( (tags, sound_objs, more_url_params, more_count, new_count)) return users_sounds, tags_sounds
def read(self, request): ip = get_client_ip(request) form = SoundSearchForm(SEARCH_SORT_OPTIONS_API, request.GET) if not form.is_valid(): resp = rc.BAD_REQUEST resp.content = form.errors return resp cd = form.cleaned_data grouping = request.GET.get("g", "") if grouping == "0": grouping = "" solr = Solr(settings.SOLR_URL) sounds_per_page = min(int(request.GET.get('sounds_per_page', settings.SOUNDS_PER_API_RESPONSE)),settings.MAX_SOUNDS_PER_API_RESPONSE) query = search_prepare_query(cd['q'], cd['f'], search_prepare_sort(cd['s'], SEARCH_SORT_OPTIONS_API), cd['p'], sounds_per_page, grouping = grouping) try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator(results,sounds_per_page) page = paginator.page(form.cleaned_data['p']) sounds = [] bad_results = 0 for object in page['object_list'] : try: sound = prepare_collection_sound(Sound.objects.select_related('user').get(id=object['id']), custom_fields = request.GET.get('fields', False)) if 'more_from_pack' in object.keys(): if object['more_from_pack'] > 0: link = prepend_base(reverse('api-search')+'?q=%s&f=pack:"%s" %s&s=%s&g=%s' % (my_quote(cd['q']),object['pack_name'],my_quote(cd['f']),cd['s'],"")) if request.GET.get('sounds_per_page', None): link += "&sounds_per_page=" + str(request.GET.get('sounds_per_page', None)) if request.GET.get('fields', False): link += "&fields=" + str(request.GET.get('fields', False)) sound['results_from_the_same_pack'] = link sound['n_results_from_the_same_pack'] = object['more_from_pack'] sounds.append(sound) except: # This will happen if there are synchronization errors between solr index and the database. In that case sounds are ommited and both num_results and results per page might become inacurate pass result = {'sounds': sounds, 'num_results': paginator.count - bad_results, 'num_pages': paginator.num_pages} # construct previous and next urls if page['has_other_pages']: if page['has_previous']: result['previous'] = self.__construct_pagination_link(cd['q'], page['previous_page_number'], cd['f'], find_api_option(cd['s']), request.GET.get('sounds_per_page', None), request.GET.get('fields', False), grouping) if page['has_next']: result['next'] = self.__construct_pagination_link(cd['q'], page['next_page_number'], cd['f'], find_api_option(cd['s']), request.GET.get('sounds_per_page',None), request.GET.get('fields', False), grouping) add_request_id(request,result) logger.info("Searching,q=" + cd['q'] + ",f=" + cd['f'] + ",p=" + str(cd['p']) + ",sounds_per_page=" + str(sounds_per_page) + ",api_key=" + request.GET.get("api_key", False) + ",api_key_username="******",ip=" + ip) return result except SolrException, e: error = "search_query %s filter_query %s sort %s error %s" \ % (cd['s'], cd['f'], cd['s'], e) raise ReturnError(500, "SearchError", {"explanation": error})
msg="The similarity server could not be reached or some unexpected error occurred." ) elif ( not search_form.cleaned_data["descriptors_filter"] and not search_form.cleaned_data["target"] and not target_file ): # Standard text-based search try: solr = Solr(settings.SOLR_URL) query = search_prepare_query( unquote(search_form.cleaned_data["query"]), unquote(search_form.cleaned_data["filter"]), search_form.cleaned_data["sort"], search_form.cleaned_data["page"], search_form.cleaned_data["page_size"], grouping=search_form.cleaned_data["group_by_pack"], include_facets=False, ) result = SolrResponseInterpreter(solr.select(unicode(query))) solr_ids = [element["id"] for element in result.docs] solr_count = result.num_found more_from_pack_data = None if search_form.cleaned_data["group_by_pack"]: # If grouping option is on, store grouping info in a dictionary that we can add when serializing sounds more_from_pack_data = dict( [ (int(element["id"]), [element["more_from_pack"], element["pack_id"], element["pack_name"]])
def check_if_sound_exists_in_solr(sound): solr = Solr(settings.SOLR_URL) response = SolrResponseInterpreter( solr.select(unicode(search_prepare_query( '', 'id:%i' % sound.id, search_prepare_sort('created asc', SEARCH_SORT_OPTIONS_WEB), 1, 1)))) return response.num_found > 0
raise ServerErrorException(msg=e.message) except Exception, e: if settings.DEBUG: raise ServerErrorException(msg=e.message) else: raise ServerErrorException() elif not search_form.cleaned_data['descriptors_filter'] and not search_form.cleaned_data['target'] and not target_file: # Standard text-based search try: solr = Solr(settings.SOLR_URL) query = search_prepare_query(unquote(search_form.cleaned_data['query']), unquote(search_form.cleaned_data['filter']), search_prepare_sort(search_form.cleaned_data['sort'], SEARCH_SORT_OPTIONS_API), search_form.cleaned_data['page'], search_form.cleaned_data['page_size'], grouping=search_form.cleaned_data['group_by_pack'], include_facets=False) result = SolrResponseInterpreter(solr.select(unicode(query))) solr_ids = [element['id'] for element in result.docs] solr_count = result.num_found more_from_pack_data = None if search_form.cleaned_data['group_by_pack']: # If grouping option is on, store grouping info in a dictionary that we can add when serializing sounds more_from_pack_data = dict([(int(element['id']), [element['more_from_pack'], element['pack_id'], element['pack_name']]) for element in result.docs]) return solr_ids, solr_count, None, more_from_pack_data, None