def handle(self, *args, **options): LIMIT = None SLICE_SIZE = 500 solr_sound_ids = [] solr = Solr(url=settings.SOLR_URL) query = SolrQuery() query.set_dismax_query("") # Query to get ALL sounds print "Retrieving ids from %i to %i" % (0, SLICE_SIZE) query.set_query_options(field_list=["id"], rows=SLICE_SIZE, start=0) results = SolrResponseInterpreter(solr.select(unicode(query))) solr_sound_ids += list_of_dicts_to_list_of_ids(results.docs) total_num_documents = results.num_found # Start iterating over other pages (slices) if LIMIT: number_of_documents = min(LIMIT, total_num_documents) else: number_of_documents = total_num_documents for i in range(SLICE_SIZE, number_of_documents, SLICE_SIZE): print "Retrieving ids from %i to %i" % (i, i + SLICE_SIZE) query.set_query_options(field_list=["id"], rows=SLICE_SIZE, start=i) results = SolrResponseInterpreter(solr.select(unicode(query))) solr_sound_ids += list_of_dicts_to_list_of_ids(results.docs) solr_sound_ids = sorted(list(set(solr_sound_ids))) if LIMIT: solr_sound_ids = solr_sound_ids[0:LIMIT] print "%i document ids retrieved" % len(solr_sound_ids) n_deleted = 0 print "" for count, id in enumerate(solr_sound_ids): sys.stdout.write("\rChecking doc %i of %i" % (count, len(solr_sound_ids))) sys.stdout.flush() if Sound.objects.filter(id=id, moderation_state="OK", processing_state="OK").exists(): pass else: # Sound does not exist in the Db or is not properly moderated and processed print "\n\t - Deleting sound with id %i from solr index" % id solr.delete_by_id(id) n_deleted += 1 print "\n\nDONE! %i sounds deleted from solr index (it may take some minutes to actually see the changes in the page)" % n_deleted
def get_all_sound_ids_from_solr(limit=False): logger.info("getting all sound ids from solr.") if not limit: limit = 99999999999999 solr = Solr(settings.SOLR_URL) solr_ids = [] solr_count = None PAGE_SIZE = 2000 current_page = 1 while (len(solr_ids) < solr_count or solr_count is None) and len(solr_ids) < limit: response = SolrResponseInterpreter( solr.select( unicode( search_prepare_query('', '', search_prepare_sort( 'created asc', SEARCH_SORT_OPTIONS_WEB), current_page, PAGE_SIZE, include_facets=False)))) solr_ids += [element['id'] for element in response.docs] solr_count = response.num_found current_page += 1 return sorted(solr_ids)
def setUp(self): # Generate a fake solr response data to mock perform_solr_query function self.NUM_RESULTS = 15 sound_ids = list( Sound.objects.filter(moderation_state="OK", processing_state="OK").values_list( 'id', flat=True)[:self.NUM_RESULTS]) solr_select_returned_data['grouped']['grouping_pack']['groups'] = [{ 'doclist': { 'docs': [{ 'id': sound_id }], 'numFound': 1, 'start': 0 }, 'groupValue': str(sound_id) } for sound_id in sound_ids] results = SolrResponseInterpreter( copy.deepcopy(solr_select_returned_data)) # NOTE: in the line abve, we need to deepcopy the dictionary of results because SolrResponseInterpreter changes # it and makes it break when run a second time. Ideally SolrResponseInterpreter should be fixed so that it does # not change its input parameter. paginator = SolrResponseInterpreterPaginator(results, self.NUM_RESULTS) page = paginator.page(1) # Get first page self.perform_solr_query_response = \ (results.non_grouped_number_of_matches, results.facets, paginator, page, results.docs)
def get_user_tags(self, use_solr=True): if use_solr: query = SolrQuery() query.set_dismax_query('') filter_query = 'username:\"%s\"' % self.user.username query.set_query_options(field_list=["id"], filter_query=filter_query) query.add_facet_fields("tag") query.set_facet_options("tag", limit=10, mincount=1) solr = Solr(settings.SOLR_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) except SolrException as e: return False except Exception as e: return False return [{'name': tag, 'count': count} for tag, count in results.facets['tag']] else: return DelayedQueryExecuter(""" SELECT tags_tag.name AS name, X.c AS count FROM ( SELECT tag_id, count(*) as c FROM tags_taggeditem LEFT JOIN sounds_sound ON object_id=sounds_sound.id WHERE tags_taggeditem.user_id=%d AND sounds_sound.moderation_state='OK' AND sounds_sound.processing_state='OK' GROUP BY tag_id ORDER BY c DESC LIMIT 10) AS X LEFT JOIN tags_tag ON tags_tag.id=X.tag_id ORDER BY tags_tag.name;""" % self.user_id)
def handle(self, *args, **options): LIMIT = None SLICE_SIZE = 500 solr_post_ids = [] solr = Solr(url=settings.SOLR_FORUM_URL) query = SolrQuery() query.set_dismax_query("") # Query to get ALL forums console_logger.info("Retrieving ids from %i to %i"%(0,SLICE_SIZE)) query.set_query_options(field_list=["id"], rows = SLICE_SIZE, start = 0) results = SolrResponseInterpreter(solr.select(unicode(query))) solr_post_ids += list_of_dicts_to_list_of_ids(results.docs) total_num_documents = results.num_found # Start iterating over other pages (slices) if LIMIT: number_of_documents = min(LIMIT,total_num_documents) else: number_of_documents = total_num_documents for i in range(SLICE_SIZE, number_of_documents,SLICE_SIZE): console_logger.info("Retrieving ids from %i to %i"%(i,i+SLICE_SIZE-1)) query.set_query_options(field_list=["id"], rows = SLICE_SIZE, start = i) results = SolrResponseInterpreter(solr.select(unicode(query))) solr_post_ids += list_of_dicts_to_list_of_ids(results.docs) solr_post_ids = sorted(list(set(solr_post_ids))) if LIMIT: solr_post_ids = solr_post_ids[0:LIMIT] console_logger.info("%i document ids retrieved"%len(solr_post_ids)) n_deleted = 0 console_logger.info("") for count, id in enumerate(solr_post_ids): if count % 100 == 0: console_logger.info("\rChecking docs %i/%i"%(count,len(solr_post_ids))) if Post.objects.filter(id=id,moderation_state="OK").exists(): pass else: # Post does not exist in the Db or is not properly moderated and processed console_logger.info("\n\t - Deleting forum with id %i from solr index" % id) solr.delete_by_id(id) n_deleted += 1 console_logger.info("\n\nDONE! %i forums deleted from solr index (it may take some minutes to actually see " "the changes in the page)" % n_deleted)
def check_if_sound_exists_in_solr(sound): solr = Solr(settings.SOLR_URL) response = SolrResponseInterpreter( solr.select( unicode( search_prepare_query( '', 'id:%i' % sound.id, search_prepare_sort('created asc', SEARCH_SORT_OPTIONS_WEB), 1, 1)))) return response.num_found > 0
def get_solr_results(search_form, page_size, max_pages, start_page=1, valid_ids=None, solr=None, offset=None): if not solr: solr = Solr(settings.SOLR_URL) query_filter = search_form.cleaned_data['filter'] if valid_ids: # Update solr filter to only return results in valid ids ids_filter = 'id:(' + ' OR '.join([str(item) for item in valid_ids]) + ')' if query_filter: query_filter += ' %s' % ids_filter else: query_filter = ids_filter solr_ids = [] solr_count = None try: current_page = start_page n_page_requests = 1 # Iterate over solr result pages while (len(solr_ids) < solr_count or solr_count == None) and n_page_requests <= max_pages: query = search_prepare_query(unquote( search_form.cleaned_data['query'] or ""), unquote(query_filter or ""), search_form.cleaned_data['sort'], current_page, page_size, grouping=False, include_facets=False, offset=offset) result = SolrResponseInterpreter(solr.select(unicode(query))) solr_ids += [element['id'] for element in result.docs] solr_count = result.num_found #print 'Solr page %i (total %i sounds)' % (current_page, solr_count) current_page += 1 n_page_requests += 1 except SolrException as e: raise ServerErrorException(msg='Search server error: %s' % e.message) except Exception as e: raise ServerErrorException( msg= 'The search server could not be reached or some unexpected error occurred.' ) return solr_ids, solr_count
def perform_solr_query(q, current_page): """ This util function performs the query to SOLR and returns needed parameters to continue with the view. The main reason to have this util function is to facilitate mocking in unit tests for this view. """ solr = Solr(settings.SOLR_URL) results = SolrResponseInterpreter(solr.select(unicode(q))) paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE) page = paginator.page(current_page) return results.non_grouped_number_of_matches, results.facets, paginator, page, results.docs
def get_pack_tags(pack_obj): query = SolrQuery() query.set_dismax_query('') filter_query = 'username:\"%s\" pack:\"%s\"' % (pack_obj.user.username, pack_obj.name) query.set_query_options(field_list=["id"], filter_query=filter_query) query.add_facet_fields("tag") query.set_facet_options("tag", limit=20, mincount=1) try: solr = Solr(settings.SOLR_URL) results = SolrResponseInterpreter(solr.select(unicode(query))) except (SolrException, Exception) as e: # TODO: do something here? return False return results.facets
def get_user_tags(self, use_solr=True): if use_solr: query = SolrQuery() query.set_dismax_query('') filter_query = 'username:\"%s\"' % self.user.username query.set_query_options(field_list=["id"], filter_query=filter_query) query.add_facet_fields("tag") query.set_facet_options("tag", limit=10, mincount=1) solr = Solr(settings.SOLR_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) except SolrException, e: return False except Exception, e: return False
def get_user_tags(self): query = SolrQuery() query.set_dismax_query('') filter_query = 'username:\"%s\"' % self.user.username query.set_query_options(field_list=["id"], filter_query=filter_query) query.add_facet_fields("tag") query.set_facet_options("tag", limit=10, mincount=1) solr = Solr(settings.SOLR_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) except SolrException as e: return False except Exception as e: return False return [{'name': tag, 'count': count} for tag, count in results.facets['tag']]
def get_pack_tags(pack_obj): query = SolrQuery() query.set_dismax_query('') filter_query = 'username:\"%s\" pack:\"%s\"' % (pack_obj.user.username, pack_obj.name) #filter_query = 'pack:\"%s\"' % (pack_obj.name,) query.set_query_options(field_list=["id"], filter_query=filter_query) query.add_facet_fields("tag") query.set_facet_options("tag", limit=20, mincount=1) solr = Solr(settings.SOLR_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) except SolrException, e: #logger.warning("search error: query: %s error %s" % (query, e)) #error = True #error_text = 'There was an error while searching, is your query correct?' return False
def get_random_sound_from_solr(): """ Get a random sound from solr. This is used for random sound browsing. We filter explicit sounds, but otherwise don't have any other restrictions on sound attributes """ solr = Solr(settings.SOLR_URL) query = SolrQuery() rand_key = random.randint(1, 10000000) sort = ['random_%d asc' % rand_key] filter_query = 'is_explicit:0' query.set_query("*:*") query.set_query_options(start=0, rows=1, field_list=["*"], filter_query=filter_query, sort=sort) try: response = SolrResponseInterpreter(solr.select(unicode(query))) docs = response.docs if docs: return docs[0] except (SolrException, socket.error): pass return {}
def tags(request, multiple_tags=None): if multiple_tags: multiple_tags = multiple_tags.split('/') else: multiple_tags = [] multiple_tags = sorted(filter(lambda x:x, multiple_tags)) try: current_page = int(request.GET.get("page", 1)) except ValueError: current_page = 1 solr = Solr(settings.SOLR_URL) query = SolrQuery() if multiple_tags: query.set_query(" ".join("tag:\"" + tag + "\"" for tag in multiple_tags)) else: query.set_query("*:*") query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE, rows=settings.SOUNDS_PER_PAGE, field_list=["id"], sort=["num_downloads desc"]) query.add_facet_fields("tag") query.set_facet_options_default(limit=100, sort=True, mincount=1, count_missing=False) try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE) page = paginator.page(current_page) error = False tags = [dict(name=f[0], count=f[1]) for f in results.facets["tag"]] except SolrException, e: error = True search_logger.error("SOLR ERROR - %s" % e)
raise ServerErrorException(msg='The similarity server could not be reached or some unexpected error occurred.', resource=resource) elif not search_form.cleaned_data['descriptors_filter'] and not search_form.cleaned_data['target'] and not target_file: # Standard text-based search try: solr = Solr(settings.SOLR_URL) query = search_prepare_query(unquote(search_form.cleaned_data['query'] or ""), unquote(search_form.cleaned_data['filter'] or ""), search_form.cleaned_data['sort'], search_form.cleaned_data['page'], search_form.cleaned_data['page_size'], grouping=search_form.cleaned_data['group_by_pack'], include_facets=False) result = SolrResponseInterpreter(solr.select(unicode(query))) solr_ids = [element['id'] for element in result.docs] solr_count = result.num_found more_from_pack_data = None if search_form.cleaned_data['group_by_pack']: # If grouping option is on, store grouping info in a dictionary that we can add when serializing sounds more_from_pack_data = dict([(int(element['id']), [element['more_from_pack'], element['pack_id'], element['pack_name']]) for element in result.docs]) return solr_ids, solr_count, None, more_from_pack_data, None, None, None except SolrException, e: if search_form.cleaned_data['filter'] != None: raise BadRequestException(msg='Search server error: %s (please check that your filter syntax and field names are correct)' % e.message, resource=resource) raise BadRequestException(msg='Search server error: %s' % e.message, resource=resource) except Exception, e:
def tags(request, multiple_tags=None): if multiple_tags: multiple_tags = multiple_tags.split('/') else: multiple_tags = [] multiple_tags = sorted(filter(lambda x: x, multiple_tags)) try: current_page = int(request.GET.get("page", 1)) except ValueError: current_page = 1 solr = Solr(settings.SOLR_URL) query = SolrQuery() if multiple_tags: query.set_query(" ".join("tag:\"" + tag + "\"" for tag in multiple_tags)) else: query.set_query("*:*") query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE, rows=settings.SOUNDS_PER_PAGE, field_list=["id"], sort=["num_downloads desc"]) query.add_facet_fields("tag") query.set_facet_options_default(limit=100, sort=True, mincount=1, count_missing=False) query.set_group_field(group_field="grouping_pack") query.set_group_options( group_func=None, group_query=None, group_rows=10, group_start=0, group_limit=1, group_offset=0, group_sort=None, group_sort_ingroup=None, group_format='grouped', group_main=False, group_num_groups=True, group_cache_percent=0, group_truncate=True ) # Sets how many results from the same grup are taken into account for computing the facets try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE) num_results = paginator.count non_grouped_number_of_results = results.non_grouped_number_of_matches page = paginator.page(current_page) error = False tags = [dict(name=f[0], count=f[1]) for f in results.facets["tag"]] docs = results.docs resultids = [d.get("id") for d in docs] resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids) allsounds = {} for s in resultsounds: allsounds[s.id] = s for d in docs: d["sound"] = allsounds[d["id"]] except SolrException as e: error = True search_logger.error("SOLR ERROR - %s" % e) except: error = True slash_tag = "/".join(multiple_tags) space_tag = " ".join(multiple_tags) if slash_tag: follow_tags_url = reverse('follow-tags', args=[slash_tag]) unfollow_tags_url = reverse('unfollow-tags', args=[slash_tag]) show_unfollow_button = False if request.user.is_authenticated: show_unfollow_button = follow_utils.is_user_following_tag( request.user, slash_tag) return render(request, 'sounds/tags.html', locals())
count = int(count) results_before += count # clean the only few things DisMax doesn't like... :) search = search.strip("+-").replace("--", "").replace("+-", "").replace("-+", "").replace("++", "") if search == "\"" or search == "\"\"": search = "" query = SolrQuery() query.set_dismax_query(search, query_fields=[("id", 4), ("tag",3), ("description",3), ("username",2), ("pack_original",2), ("filename",2), "comment"]) query.set_query_options(start=0, rows=10, field_list=["id"]) query.add_facet_fields("samplerate", "pack_original", "username", "tag", "bitrate", "bitdepth") query.set_facet_options_default(limit=5, sort=True, mincount=1, count_missing=True) query.set_facet_options("tag", limit=30) query.set_facet_options("username", limit=30) response = solr.select(unicode(query)) interpreted = SolrResponseInterpreter(response) num_queries_total += 1 num_queries_this_loop += 1 time_solr += interpreted.q_time results_solr += interpreted.num_found except KeyboardInterrupt: break except UnicodeDecodeError: pass
def api_search(search_form, target_file=None, extra_parameters=False, merging_strategy='merge_optimized', resource=None): if search_form.cleaned_data['query'] is None \ and search_form.cleaned_data['filter'] is None \ and not search_form.cleaned_data['descriptors_filter'] \ and not search_form.cleaned_data['target'] \ and not target_file: # No input data for search, return empty results return [], 0, None, None, None, None, None if search_form.cleaned_data['query'] is None and search_form.cleaned_data[ 'filter'] is None: # Standard content-based search try: results, count, note = similarity_api_search( target=search_form.cleaned_data['target'], filter=search_form.cleaned_data['descriptors_filter'], num_results=search_form.cleaned_data['page_size'], offset=(search_form.cleaned_data['page'] - 1) * search_form.cleaned_data['page_size'], target_file=target_file) gaia_ids = [result[0] for result in results] distance_to_target_data = None if search_form.cleaned_data['target'] or target_file: # Save sound distance to target into view class so it can be accessed by the serializer # We only do that when a target is specified (otherwise there is no meaningful distance value) distance_to_target_data = dict(results) gaia_count = count return gaia_ids, gaia_count, distance_to_target_data, None, note, None, None except SimilarityException as e: if e.status_code == 500: raise ServerErrorException(msg=e.message, resource=resource) elif e.status_code == 400: raise BadRequestException(msg=e.message, resource=resource) elif e.status_code == 404: raise NotFoundException(msg=e.message, resource=resource) else: raise ServerErrorException(msg='Similarity server error: %s' % e.message, resource=resource) except Exception as e: raise ServerErrorException( msg= 'The similarity server could not be reached or some unexpected error occurred.', resource=resource) elif not search_form.cleaned_data['descriptors_filter'] \ and not search_form.cleaned_data['target'] \ and not target_file: # Standard text-based search try: solr = Solr(settings.SOLR_URL) query = search_prepare_query( unquote(search_form.cleaned_data['query'] or ""), unquote(search_form.cleaned_data['filter'] or ""), search_form.cleaned_data['sort'], search_form.cleaned_data['page'], search_form.cleaned_data['page_size'], grouping=search_form.cleaned_data['group_by_pack'], include_facets=False) result = SolrResponseInterpreter(solr.select(unicode(query))) solr_ids = [element['id'] for element in result.docs] solr_count = result.num_found more_from_pack_data = None if search_form.cleaned_data['group_by_pack']: # If grouping option is on, store grouping info in a dictionary that we can add when serializing sounds more_from_pack_data = dict([(int(element['id']), [ element['more_from_pack'], element['pack_id'], element['pack_name'] ]) for element in result.docs]) return solr_ids, solr_count, None, more_from_pack_data, None, None, None except SolrException as e: if search_form.cleaned_data['filter'] is not None: raise BadRequestException( msg= 'Search server error: %s (please check that your filter syntax and field ' 'names are correct)' % e.message, resource=resource) raise BadRequestException(msg='Search server error: %s' % e.message, resource=resource) except Exception as e: raise ServerErrorException( msg= 'The search server could not be reached or some unexpected error occurred.', resource=resource) else: # Combined search (there is at least one of query/filter and one of descriptors_filter/target) # Strategies are implemented in 'combined_search_strategies' strategy = getattr(combined_search_strategies, merging_strategy) return strategy(search_form, target_file=target_file, extra_parameters=extra_parameters)
def search(request): search_query = request.GET.get("q", "") filter_query = request.GET.get("f", "") filter_query_link_more_when_grouping_packs = filter_query.replace(' ', '+') # Generate array with information of filters filter_query_split = [] if filter_query != "": for filter_str in re.findall(r'[\w-]+:\"[^\"]+', filter_query): filter_str = filter_str + '"' filter_display = filter_str.replace('"', '') filter_name = filter_str.split(":")[0] if filter_name != "duration" and filter_name != "is_geotagged": if filter_name == "grouping_pack": val = filter_display.split(":")[1] filter_display = "pack:" + val.split("_")[1] filter = { 'name': filter_display, 'remove_url': filter_query.replace(filter_str, ''), } filter_query_split.append(filter) try: current_page = int(request.GET.get("page", 1)) except ValueError: current_page = 1 sort = request.GET.get("s", None) sort_options = forms.SEARCH_SORT_OPTIONS_WEB grouping = request.GET.get("g", "1") # Group by default actual_groupnig = grouping # If the query is filtered by pack, do not collapse sounds of the same pack (makes no sense) # If the query is thourhg ajax (for sources remix editing), do not collapse if "pack" in filter_query or request.GET.get("ajax", "") == "1": actual_groupnig = "" # Set default values id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id'] tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag'] description_weight = settings.DEFAULT_SEARCH_WEIGHTS['description'] username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username'] pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS['pack_tokenized'] original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS[ 'original_filename'] # Parse advanced search options advanced = request.GET.get("advanced", "") # if advanced search if advanced == "1": a_tag = request.GET.get("a_tag", "") a_filename = request.GET.get("a_filename", "") a_description = request.GET.get("a_description", "") a_packname = request.GET.get("a_packname", "") a_soundid = request.GET.get("a_soundid", "") a_username = request.GET.get("a_username", "") # If none is selected use all (so other filter can be appleid) if a_tag or a_filename or a_description or a_packname or a_soundid or a_username != "": # Initialize all weights to 0 id_weight = 0 tag_weight = 0 description_weight = 0 username_weight = 0 pack_tokenized_weight = 0 original_filename_weight = 0 # Set the weights of selected checkboxes if a_soundid != "": id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id'] if a_tag != "": tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag'] if a_description != "": description_weight = settings.DEFAULT_SEARCH_WEIGHTS[ 'description'] if a_username != "": username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username'] if a_packname != "": pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS[ 'pack_tokenized'] if a_filename != "": original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS[ 'original_filename'] # ALLOW "q" empty queries #if search_query.strip() == "" sort = search_prepare_sort(sort, forms.SEARCH_SORT_OPTIONS_WEB) logger.info(u'Search (%s)' % json.dumps({ 'ip': get_client_ip(request), 'query': search_query, 'filter': filter_query, 'username': request.user.username, 'page': current_page, 'sort': sort[0], 'group_by_pack': actual_groupnig, 'advanced': json.dumps({ 'search_in_tag': a_tag, 'search_in_filename': a_filename, 'search_in_description': a_description, 'search_in_packname': a_packname, 'search_in_soundid': a_soundid, 'search_in_username': a_username }) if advanced == "1" else "" })) query = search_prepare_query(search_query, filter_query, sort, current_page, settings.SOUNDS_PER_PAGE, id_weight, tag_weight, description_weight, username_weight, pack_tokenized_weight, original_filename_weight, grouping=actual_groupnig) solr = Solr(settings.SOLR_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE) num_results = paginator.count non_grouped_number_of_results = results.non_grouped_number_of_matches page = paginator.page(current_page) error = False docs = results.docs resultids = [d.get("id") for d in docs] resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids) allsounds = {} for s in resultsounds: allsounds[s.id] = s # allsounds will contain info from all the sounds returned by bulk_query_id. This should # be all sounds in docs, but if solr and db are not synchronised, it might happen that there # are ids in docs which are not found in bulk_query_id. To avoid problems we remove elements # in docs that have not been loaded in allsounds. docs = [doc for doc in docs if doc["id"] in allsounds] for d in docs: d["sound"] = allsounds[d["id"]] except SolrException, e: logger.warning("search error: query: %s error %s" % (query, e)) error = True error_text = 'There was an error while searching, is your query correct?'
def get_stream_sounds(user, time_lapse): solr = Solr(settings.SOLR_URL) sort_str = search_prepare_sort("created desc", SEARCH_SORT_OPTIONS_WEB) # # USERS FOLLOWING # users_following = get_users_following(user) users_sounds = [] for user_following in users_following: filter_str = "username:"******" created:" + time_lapse query = search_prepare_query("", filter_str, sort_str, 1, SOLR_QUERY_LIMIT_PARAM, grouping=False, include_facets=False) result = SolrResponseInterpreter(solr.select(unicode(query))) if result.num_rows != 0: more_count = max(0, result.num_found - SOLR_QUERY_LIMIT_PARAM) # the sorting only works if done like this! more_url_params = [ urllib.quote(filter_str), urllib.quote(sort_str[0]) ] # this is the same link but for the email has to be "quoted" more_url = u"?f=" + filter_str + u"&s=" + sort_str[0] # more_url_quoted = urllib.quote(more_url) sound_ids = [element['id'] for element in result.docs] sound_objs = sounds.models.Sound.objects.filter(id__in=sound_ids) new_count = more_count + len(sound_ids) users_sounds.append(((user_following, False), sound_objs, more_url_params, more_count, new_count)) # # TAGS FOLLOWING # tags_following = get_tags_following(user) tags_sounds = [] for tag_following in tags_following: tags = tag_following.split(" ") tag_filter_query = "" for tag in tags: tag_filter_query += "tag:" + tag + " " tag_filter_str = tag_filter_query + " created:" + time_lapse query = search_prepare_query("", tag_filter_str, sort_str, 1, SOLR_QUERY_LIMIT_PARAM, grouping=False, include_facets=False) result = SolrResponseInterpreter(solr.select(unicode(query))) if result.num_rows != 0: more_count = max(0, result.num_found - SOLR_QUERY_LIMIT_PARAM) # the sorting only works if done like this! more_url_params = [ urllib.quote(tag_filter_str), urllib.quote(sort_str[0]) ] # this is the same link but for the email has to be "quoted" more_url = u"?f=" + tag_filter_str + u"&s=" + sort_str[0] # more_url_quoted = urllib.quote(more_url) sound_ids = [element['id'] for element in result.docs] sound_objs = sounds.models.Sound.objects.filter(id__in=sound_ids) new_count = more_count + len(sound_ids) tags_sounds.append( (tags, sound_objs, more_url_params, more_count, new_count)) return users_sounds, tags_sounds
def search(request): search_query = request.GET.get("q", "") filter_query = request.GET.get("f", "") filter_query_link_more_when_grouping_packs = filter_query.replace(' ','+') try: current_page = int(request.GET.get("page", 1)) except ValueError: current_page = 1 sort = request.GET.get("s", None) sort_options = forms.SEARCH_SORT_OPTIONS_WEB grouping = request.GET.get("g", "1") # Group by default actual_groupnig = grouping # If the query is filtered by pack, do not collapse sounds of the same pack (makes no sense) # If the query is thourhg ajax (for sources remix editing), do not collapse if "pack" in filter_query or request.GET.get("ajax", "") == "1": actual_groupnig = "" # Set default values id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id'] tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag'] description_weight = settings.DEFAULT_SEARCH_WEIGHTS['description'] username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username'] pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS['pack_tokenized'] original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS['original_filename'] # Parse advanced search options advanced = request.GET.get("advanced", "") # if advanced search if advanced == "1" : a_tag = request.GET.get("a_tag", "") a_filename = request.GET.get("a_filename", "") a_description = request.GET.get("a_description", "") a_packname = request.GET.get("a_packname", "") a_soundid = request.GET.get("a_soundid", "") a_username = request.GET.get("a_username", "") # If none is selected use all (so other filter can be appleid) if a_tag or a_filename or a_description or a_packname or a_soundid or a_username != "" : # Initialize all weights to 0 id_weight = 0 tag_weight = 0 description_weight = 0 username_weight = 0 pack_tokenized_weight = 0 original_filename_weight = 0 # Set the weights of selected checkboxes if a_soundid != "" : id_weight = settings.DEFAULT_SEARCH_WEIGHTS['id'] if a_tag != "" : tag_weight = settings.DEFAULT_SEARCH_WEIGHTS['tag'] if a_description != "" : description_weight = settings.DEFAULT_SEARCH_WEIGHTS['description'] if a_username != "" : username_weight = settings.DEFAULT_SEARCH_WEIGHTS['username'] if a_packname != "" : pack_tokenized_weight = settings.DEFAULT_SEARCH_WEIGHTS['pack_tokenized'] if a_filename != "" : original_filename_weight = settings.DEFAULT_SEARCH_WEIGHTS['original_filename'] # ALLOW "q" empty queries #if search_query.strip() == "" sort = search_prepare_sort(sort, forms.SEARCH_SORT_OPTIONS_WEB) query = search_prepare_query(search_query, filter_query, sort, current_page, settings.SOUNDS_PER_PAGE, id_weight, tag_weight, description_weight, username_weight, pack_tokenized_weight, original_filename_weight, grouping = actual_groupnig ) solr = Solr(settings.SOLR_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE) num_results = paginator.count non_grouped_number_of_results = results.non_grouped_number_of_matches page = paginator.page(current_page) error = False docs = results.docs resultids = [d.get("id") for d in docs] resultsounds = sounds.models.Sound.objects.bulk_query_id(resultids) allsounds = {} for s in resultsounds: allsounds[s.id] = s for d in docs: d["sound"] = allsounds[d["id"]] # clickusage tracking if settings.LOG_CLICKTHROUGH_DATA: request_full_path = request.get_full_path() # The session id of an unauthenticated user is different from the session id of the same user when # authenticated. request.session["searchtime_session_key"] = request.session.session_key if results.docs is not None: ids = [] for item in results.docs: ids.append(item["id"]) logger_click.info("QUERY : %s : %s : %s : %s" % (unicode(request_full_path).encode('utf-8'), request.session.session_key, unicode(ids).encode('utf-8'), unicode(current_page).encode('utf-8'))) except SolrException, e: logger.warning("search error: query: %s error %s" % (query, e)) error = True error_text = 'There was an error while searching, is your query correct?'
def search_forum(request): search_query = request.GET.get("q", "") filter_query = request.GET.get("f", "") try: current_page = int(request.GET.get("page", 1)) except ValueError: current_page = 1 current_forum_name_slug = request.GET.get( "forum", "").strip() # for context sensitive search if current_forum_name_slug: current_forum = get_object_or_404(forum.models.Forum.objects, name_slug=current_forum_name_slug) else: current_forum = None sort = ["thread_created desc"] # Parse advanced search options advanced_search = request.GET.get("advanced_search", "") date_from = request.GET.get("dt_from", "") try: df_parsed = datetime.datetime.strptime(date_from, "%Y-%m-%d") date_from_display = df_parsed.strftime("%d-%m-%Y") except ValueError: date_from = "" date_from_display = "Choose a Date" date_to = request.GET.get("dt_to", "") try: dt_parsed = datetime.datetime.strptime(date_to, "%Y-%m-%d") date_to_display = dt_parsed.strftime("%d-%m-%Y") except ValueError: date_to = "" date_to_display = "Choose a Date" if search_query.startswith("search in"): search_query = "" error = False error_text = "" paginator = None num_results = None page = None results = [] if search_query.strip() != "" or filter_query: # add current forum if current_forum: filter_query += "forum_name_slug:" + current_forum.name_slug # add date range if advanced_search == "1" and date_from != "" or date_to != "": filter_query = __add_date_range(filter_query, date_from, date_to) query = SolrQuery() query.set_dismax_query(search_query, query_fields=[("thread_title", 4), ("post_body", 3), ("thread_author", 3), ("post_author", 3), ("forum_name", 2)]) query.set_highlighting_options_default( field_list=["post_body"], fragment_size=200, alternate_field="post_body", # TODO: revise this param require_field_match=False, pre="<strong>", post="</strong>") query.set_query_options( start=(current_page - 1) * settings.SOUNDS_PER_PAGE, rows=settings.SOUNDS_PER_PAGE, field_list=[ "id", "forum_name", "forum_name_slug", "thread_id", "thread_title", "thread_author", "thread_created", "post_body", "post_author", "post_created", "num_posts" ], filter_query=filter_query, sort=sort) query.set_group_field("thread_title_grouped") query.set_group_options(group_limit=30) solr = Solr(settings.SOLR_FORUM_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator( results, settings.SOUNDS_PER_PAGE) num_results = paginator.count page = paginator.page(current_page) error = False except SolrException as e: logger.warning("search error: query: %s error %s" % (query, e)) error = True error_text = 'There was an error while searching, is your query correct?' except Exception as e: logger.error("Could probably not connect to Solr - %s" % e) error = True error_text = 'The search server could not be reached, please try again later.' tvars = { 'advanced_search': advanced_search, 'current_forum': current_forum, 'current_page': current_page, 'date_from': date_from, 'date_from_display': date_from_display, 'date_to': date_to, 'date_to_display': date_to_display, 'error': error, 'error_text': error_text, 'filter_query': filter_query, 'num_results': num_results, 'page': page, 'paginator': paginator, 'search_query': search_query, 'sort': sort, 'results': results, } return render(request, 'search/search_forum.html', tvars)
def search_forum(request): search_query = request.GET.get("q", "") filter_query = request.GET.get("f", "") try: current_page = int(request.GET.get("page", 1)) except ValueError: current_page = 1 current_forum_name_slug = request.GET.get("current_forum_name_slug", "").strip() # for context sensitive search current_forum_name = request.GET.get("current_forum_name", "").strip() # used in breadcrumb sort = ["thread_created desc"] # Parse advanced search options advanced_search = request.GET.get("advanced_search", "") date_from = request.GET.get("dt_from", "") date_to = request.GET.get("dt_to", "") # TEMPORAL WORKAROUND!!! to prevent using watermark as the query for forum search... # It only happens in some situations. if "search in " in search_query: invalid = 1 if search_query.strip() != "" or filter_query: # add current forum if current_forum_name_slug.strip() != "": filter_query += "forum_name_slug:" + current_forum_name_slug # add date range if advanced_search == "1" and date_from != "" or date_to != "": filter_query = __add_date_range(filter_query, date_from, date_to) query = SolrQuery() query.set_dismax_query(search_query, query_fields=[("thread_title", 4), ("post_body", 3), ("thread_author", 3), ("post_author", 3), ("forum_name", 2)]) query.set_highlighting_options_default(field_list=["post_body"], fragment_size=200, alternate_field="post_body", # TODO: revise this param require_field_match=False, pre="<strong>", post="</strong>") query.set_query_options(start=(current_page - 1) * settings.SOUNDS_PER_PAGE, rows=settings.SOUNDS_PER_PAGE, field_list=["id", "forum_name", "forum_name_slug", "thread_id", "thread_title", "thread_author", "thread_created", "post_body", "post_author", "post_created", "num_posts"], filter_query=filter_query, sort=sort) query.set_group_field("thread_title_grouped") query.set_group_options(group_limit=30) solr = Solr(settings.SOLR_FORUM_URL) try: results = SolrResponseInterpreter(solr.select(unicode(query))) paginator = SolrResponseInterpreterPaginator(results, settings.SOUNDS_PER_PAGE) num_results = paginator.count page = paginator.page(current_page) error = False except SolrException, e: logger.warning("search error: query: %s error %s" % (query, e)) error = True error_text = 'There was an error while searching, is your query correct?' except Exception, e: logger.error("Could probably not connect to Solr - %s" % e) error = True error_text = 'The search server could not be reached, please try again later.'