def test_add_and_delete(self): """Adding a doc should add it to the search index; deleting should delete it.""" doc = document(save=True) self.refresh() eq_(elasticutils.S(Document).count(), 1) doc.delete() self.refresh() eq_(elasticutils.S(Document).count(), 0)
def test_question_no_answers_deleted(self): eq_(elasticutils.S(Question).count(), 0) q = question(save=True) self.refresh() eq_(elasticutils.S(Question).count(), 1) q.delete() self.refresh() eq_(elasticutils.S(Question).count(), 0)
def get_doctype_stats(): """Returns a dict of name -> count for documents indexed. For example: >>> get_doctype_stats() {'questions': 1000, 'forums': 1000, 'wiki': 1000} :throws pyes.urllib3.MaxRetryError: if it can't connect to elasticsearch :throws pyes.exceptions.IndexMissingException: if the index doesn't exist """ # TODO: We have to import these here, otherwise we have an import # loop es_utils -> models.py -> es_utils. This should get fixed by # having the models register themselves as indexable with es_utils # or something like that. Then es_utils won't have to explicitly # know about models. from forums.models import Thread from questions.models import Question from wiki.models import Document stats = {} for name, model in (('questions', Question), ('forums', Thread), ('wiki', Document)): stats[name] = elasticutils.S(model).count() return stats
def test_question_one_answer_deleted(self): eq_(elasticutils.S(Question).count(), 0) q = question(save=True) a = answer(question=q, save=True) self.refresh() # Question and its answers are a single document--so the # index count should be only 1. eq_(elasticutils.S(Question).count(), 1) a.delete() self.refresh() eq_(elasticutils.S(Question).count(), 1) q.delete() self.refresh() eq_(elasticutils.S(Question).count(), 0)
def get(self, request): search_text = lower(request.GET["term"] or "") database_name = get_database_name(request.user) query = elasticutils.S().es(urls=ELASTIC_SEARCH_URL, timeout=ELASTIC_SEARCH_TIMEOUT).indexes(database_name).doctypes("reporter") \ .query(or_={'name__match': search_text, 'name_value': search_text, 'short_code__match': search_text, 'short_code_value': search_text}) \ .values_dict() resp = [{"id": r["short_code"], "label": self.get_label(r)} for r in query[:min(query.count(), 50)]] return HttpResponse(json.dumps(resp))
def update_submission_search_for_datasender_edition(dbm, short_code, ds_name): kwargs = {"%s%s" % (SubmissionIndexConstants.DATASENDER_ID_KEY, "_value"): short_code} fields_mapping = {SubmissionIndexConstants.DATASENDER_NAME_KEY: ds_name} project_form_model_ids = [project.id for project in get_all_projects(dbm, short_code)] query = elasticutils.S().es(urls=ELASTIC_SEARCH_URL, timeout=ELASTIC_SEARCH_TIMEOUT).indexes(dbm.database_name).doctypes(*project_form_model_ids) query = query[:query.count()].filter(**kwargs) for survey_response in query.values_dict('void'): SubmissionIndexUpdateHandler(dbm.database_name, survey_response._type).update_field_in_submission_index( survey_response._id, fields_mapping)
def test_deleted(self): new_thread = thread() eq_(elasticutils.S(Thread).count(), 0) # Saving a new Thread does create a new document in the # index. new_thread.save() self.refresh() eq_(elasticutils.S(Thread).count(), 1) new_post = post(thread=new_thread) eq_(elasticutils.S(Thread).count(), 1) new_post.save() self.refresh() eq_(elasticutils.S(Thread).count(), 1) new_thread.delete() self.refresh() eq_(elasticutils.S(Thread).count(), 0)
def test_added(self): eq_(elasticutils.S(Question).count(), 0) q = question(save=True) self.refresh() eq_(elasticutils.S(Question).count(), 1) a = answer(question=q) self.refresh() eq_(elasticutils.S(Question).count(), 1) a.save() self.refresh() # Creating a new answer for a question doesn't create a new # document in the index. Therefore, the count remains 1. # # TODO: This is ambiguous: it's not clear whether we correctly # updated the document in the index or whether the post_save # hook didn't kick off. Need a better test. eq_(elasticutils.S(Question).count(), 1)
def process_by_chunk_questionnaire(dbm, project_form_model_ids, fields_mapping, kwargs): assert len(project_form_model_ids) < ES_NUMBER_OF_TYPES_SUPPORTED query = elasticutils.S().es( urls=ELASTIC_SEARCH_URL, timeout=ELASTIC_SEARCH_TIMEOUT).indexes( dbm.database_name).doctypes(*project_form_model_ids) query = query[:query.count()].filter(**kwargs) for survey_response in query.values_dict('void'): SubmissionIndexUpdateHandler( dbm.database_name, survey_response._type).update_field_in_submission_index( survey_response._id, fields_mapping)
def test_added(self): new_thread = thread() eq_(elasticutils.S(Thread).count(), 0) # Saving a new Thread does create a new document in the # index. new_thread.save() self.refresh() eq_(elasticutils.S(Thread).count(), 1) new_post = post(thread=new_thread) eq_(elasticutils.S(Thread).count(), 1) new_post.save() self.refresh() # Saving a new post in a thread doesn't create a new # document in the index. Therefore, the count remains 1. # # TODO: This is ambiguous: it's not clear whether we correctly # updated the document in the index or whether the post_save # hook didn't kick off. Need a better test. eq_(elasticutils.S(Thread).count(), 1)
def _check_if_questionnaire_has_submissions_with_unique_id( manager, project, unique_id): field_names = [ _get_unique_id_es_field_name(field, project.id) for field in project.entity_questions ] query = elasticutils.S().es(urls=ELASTIC_SEARCH_URL, timeout=ELASTIC_SEARCH_TIMEOUT).indexes( manager.database_name).doctypes( project.id)[:1] for field_name in field_names: params = {field_name: unique_id} query = query.filter(**params) return list(query.values_list('status'))
def subject_autocomplete(request, entity_type): search_text = lower(request.GET["term"] or "") database_name = get_database_name(request.user) dbm = get_database_manager(request.user) form_model = get_form_model_by_entity_type(dbm, [entity_type.lower()]) subject_name_field = get_field_by_attribute_value(form_model, 'name', 'name') es_field_name_for_subject_name = es_questionnaire_field_name(subject_name_field.code, form_model.id) subject_short_code_field = get_field_by_attribute_value(form_model, 'name', 'short_code') es_field_name_for_short_code = es_questionnaire_field_name(subject_short_code_field.code, form_model.id) query = elasticutils.S().es(urls=ELASTIC_SEARCH_URL, timeout=ELASTIC_SEARCH_TIMEOUT).indexes(database_name).doctypes(lower(entity_type)) \ .query(or_={es_field_name_for_subject_name + '__match': search_text, es_field_name_for_subject_name + '_value': search_text, es_field_name_for_short_code + '__match': search_text, es_field_name_for_short_code + '_value': search_text}) \ .values_dict() resp = [{"id": r[es_field_name_for_short_code], "label": r[es_field_name_for_subject_name]} for r in query[:min(query.count(), 50)]] return HttpResponse(json.dumps(resp))
def _check_if_questionnaire_has_submissions_with_unique_id( manager, project, unique_id, entity_type): field_names = [ _get_unique_id_es_field_name(field, project.id, project.is_entity_registration_form()) for field in project.get_questions_for_entity([entity_type]) ] doc_type = project.entity_type[0] if project.is_entity_registration_form( ) else project.id query = elasticutils.S().es( urls=ELASTIC_SEARCH_URL, timeout=ELASTIC_SEARCH_TIMEOUT).indexes( manager.database_name).doctypes(doc_type)[:1] for field_name in field_names: params = {field_name: unique_id} query = query.filter(**params) return list( query.values_list( 'void' if project.is_entity_registration_form() else 'status'))
def symfony(inp): help = "The syntax is: !sf [category] <search term> - Omitted category matches all" if not elasticutils.get_es().indices.exists('doc-index'): return "Index currently unavailable. Try again in a bit." if '' == inp: return help category = re.compile('\[[a-z]+\]').match(inp) search = elasticutils.S().indexes('doc-index').doctypes('doc-section-type') if category: category = category.group() inp = inp.replace(category, '').strip() if '' == inp : return help search = search.query(category__prefix=category.replace('[', '').replace(']', '')) # cant fit more than 3 links into 1 irc message results = search.query(tags__match=inp, title__match=inp, content__match=inp, should=True)[:3].execute() if not len(results): return "Sorry, seems like I can't help you with that." topScore = results.results[0]['_score'] matches = [] for result in results: if result._score + 0.5 >= topScore: matches.append(result.url) # left in for debug #matches.append(str(result._score) + ' - ' + result.url) if len(matches) > 1: responseText = "These are the docs I found most relevant for you: %s" else: responseText = "This is what I found most relevant for you: %s" return responseText % ', '.join(matches)
def get_query(self, database_name, *doc_type): return elasticutils.S().es( urls=ELASTIC_SEARCH_URL).indexes(database_name).doctypes(*doc_type)
import elasticutils from datawinners.settings import ELASTIC_SEARCH_URL basic_es = elasticutils.S().es(urls=ELASTIC_SEARCH_URL).indexes( "hni_testorg_slx364903").doctypes("bd20c0ee622b11e3acca001c42a6c505") # basic_es=basic_es.query(bd20c0ee622b11e3acca001c42a6c505_q4='bad') basic_es = basic_es.query_raw({"match": { "ds_name": "Tester", }}) s = basic_es.facet("bd20c0ee622b11e3acca001c42a6c505_q2_value") s = s.facet("bd20c0ee622b11e3acca001c42a6c505_q3_value") print s.facet_counts()
def _get_submissions_for_unique_id_entry(args, dbm, project): query = elasticutils.S().es(urls=ELASTIC_SEARCH_URL, timeout=ELASTIC_SEARCH_TIMEOUT).indexes( dbm.database_name).doctypes(project.id) query = query[:query.count()].filter(**args) return query
def get_unregistered_datasenders(dbm, questionnaire_id): facets = elasticutils.S().es(urls=ELASTIC_SEARCH_URL, timeout=ELASTIC_SEARCH_TIMEOUT).indexes(dbm.database_name) \ .doctypes(questionnaire_id).filter(is_anonymous=True, ds_id='n/a', void=False) \ .facet('ds_name_exact', filtered=True).facet_counts()['ds_name_exact'] return [facet['term'] for facet in facets]
def _create_elastic_search_query(entity_type, dbm, response_limit): return elasticutils.S().es(urls=ELASTIC_SEARCH_URL, timeout=ELASTIC_SEARCH_TIMEOUT).\ indexes(dbm.database_name).doctypes(entity_type)[0:response_limit]
def get_query(self, database_name, doc_type): return elasticutils.S().es( urls=ELASTIC_SEARCH_URL, timeout=ELASTIC_SEARCH_TIMEOUT).indexes( database_name).doctypes(doc_type).filter(void=False)
def get_non_deleted_submission_count(dbm, questionnaire_id): return elasticutils.S().es(urls=ELASTIC_SEARCH_URL, timeout=ELASTIC_SEARCH_TIMEOUT) \ .indexes(dbm.database_name).doctypes(questionnaire_id) \ .filter(void=False).count()
def _query_for_questionnaire(dbm, form_model): return elasticutils.S().es(urls=ELASTIC_SEARCH_URL, timeout=ELASTIC_SEARCH_TIMEOUT).indexes( dbm.database_name).doctypes(form_model.id)