def recreate_index_for_db(database_name): start = time.time() es = get_elasticsearch_handle() logger = logging.getLogger(database_name) try: es.delete_index(database_name) except Exception as e: logger.info("Could not delete index " + str(e.message)) response = es.create_index(database_name, settings={ "number_of_shards": 1, "number_of_replicas": 0 }) logger.info('%s search index created : %s' % (database_name, response.get('ok'))) dbm = get_db_manager(database_name) try: create_all_mappings(dbm) create_all_indices(dbm) except Exception as e: logger.exception("recreate index failed for database %s for" % dbm.database_name) logger.info( 'Time taken (seconds) for indexing {database_name} : {timetaken}'. format(database_name=database_name, timetaken=(time.time() - start)))
def create_ds_mapping(dbm, form_model): es = get_elasticsearch_handle() fields = form_model.fields fields.append(TextField(name="projects", code='projects', label='projects')) fields.append(TextField(name="groups", code='groups', label='My Groups')) fields.append(TextField(name="customgroups", code='customgroups', label='Custom groups')) es.put_mapping(dbm.database_name, REPORTER_ENTITY_TYPE[0], get_fields_mapping(REPORTER_ENTITY_TYPE[0], fields))
def update_submission_search_index(submission_doc, dbm, refresh_index=True): es = get_elasticsearch_handle() form_model = get_form_model_by_code(dbm, submission_doc.form_code) #submission_doc = SurveyResponseDocument.load(dbm.database, feed_submission_doc.id) search_dict = _meta_fields(submission_doc, dbm) _update_with_form_model_fields(dbm, submission_doc, search_dict, form_model) es.index(dbm.database_name, form_model.id, search_dict, id=submission_doc.id, refresh=refresh_index)
def create_search_indices_for_deleted_datasender(db_name): logger = logging.getLogger(db_name) try: logger.info('Starting indexing') dbm = get_db_manager(db_name) es = get_elasticsearch_handle(timeout=600) form_model = get_form_model_by_code(dbm, REGISTRATION_FORM_CODE) datasenders = [] for entity in get_all_entities_include_voided(dbm, REPORTER_ENTITY_TYPE): if not entity.data: continue if entity.is_void() or entity.short_code == 'test': datasender_dict = _create_datasender_dict( dbm, entity, REPORTER, form_model) datasender_dict.update({'id': entity.id}) datasenders.append(datasender_dict) if datasenders: es.bulk_index(dbm.database_name, REPORTER, datasenders) logger.info('Created index for datasenders with ids :' + str([a.get('id') for a in datasenders])) logger.info('Completed Indexing') mark_as_successful(db_name) except Exception as e: logger.exception(e.message)
def create_index(dbm, form_model, logger): form_code = form_model.form_code start_key = [form_code] end_key = [form_code, {}] rows = dbm.database.iterview("surveyresponse/surveyresponse", 1000, reduce=False, include_docs=False, startkey=start_key, endkey=end_key) es = get_elasticsearch_handle(timeout=600) survey_response_docs = [] for row in rows: survey_response = SurveyResponseDocument._wrap_row(row) search_dict = _meta_fields(survey_response, dbm) _update_with_form_model_fields(dbm, survey_response, search_dict, form_model) search_dict.update({'id': survey_response.id}) survey_response_docs.append(search_dict) if survey_response_docs: es.bulk_index(dbm.database_name, form_model.id, survey_response_docs) logger.info('Created index for survey response docs ' + str([doc.get('id') for doc in survey_response_docs]))
def create_datasender_mapping(dbm, form_model): es = get_elasticsearch_handle() fields = form_model.fields fields.append(TextField(name="projects", code='projects', label='projects')) es.put_mapping(dbm.database_name, REPORTER_ENTITY_TYPE[0], get_fields_mapping(form_model.form_code, fields))
def update_datasender_index(contact_doc, dbm): es = get_elasticsearch_handle() if contact_doc.data: form_model = get_form_model_by_code(dbm, REGISTRATION_FORM_CODE) datasender_dict = _create_contact_dict(dbm, contact_doc, form_model) es.index(dbm.database_name, REPORTER_ENTITY_TYPE[0], datasender_dict, id=contact_doc.id) es.refresh(dbm.database_name)
def update_field_in_submission_index(self, document_id, fields_mapping, refresh_index=True): es = get_elasticsearch_handle() es.update(self.index, self.doc_type, id=document_id, doc=fields_mapping, refresh=refresh_index)
def update_submission_search_index(submission_doc, dbm, refresh_index=True): es = get_elasticsearch_handle() form_model = FormModel.get(dbm, submission_doc.form_model_id) search_dict = _meta_fields(submission_doc, dbm) _update_with_form_model_fields(dbm, submission_doc, search_dict, form_model) es.index(dbm.database_name, form_model.id, search_dict, id=submission_doc.id, refresh=refresh_index)
def create_subject_mapping(dbm, form_model): es = get_elasticsearch_handle() fields_definition = [] for field in form_model.fields: fields_definition.append( get_field_definition(field, field_name=es_field_name( field.code, form_model.id))) mapping = get_fields_mapping_by_field_def( doc_type=form_model.id, fields_definition=fields_definition) es.put_mapping(dbm.database_name, form_model.entity_type[0], mapping)
def delete_organizations(modeladmin, request, queryset): orgs = queryset.filter(status='Deactivated') for organization in orgs: dbm = get_database_manager_for_org(organization) organization.purge_all_data() del dbm.server[dbm.database_name] feed_database_name = "feed_" + dbm.database_name feed_dbm = feeds_db_for(feed_database_name) del feed_dbm.server[feed_database_name] es = get_elasticsearch_handle() es.delete_index(dbm.database_name)
def handle(self, *args, **options): es = get_elasticsearch_handle() if len(args) > 0: databases_to_index = args[0:] else: databases_to_index = all_db_names() for database_name in databases_to_index: logger = logging.getLogger(database_name) recreate_index_for_db(database_name, es, logger) logger.info('Done') print 'Completed!'
def update_datasender_index(entity_doc, dbm): es = get_elasticsearch_handle() if entity_doc.data: entity_type = entity_doc.aggregation_paths['_type'][0].lower() form_model = get_form_model_by_code(dbm, REGISTRATION_FORM_CODE) datasender_dict = _create_datasender_dict(dbm, entity_doc, entity_type, form_model) es.index(dbm.database_name, entity_type, datasender_dict, id=entity_doc.id) es.refresh(dbm.database_name)
def update_datasender_index(contact_doc, dbm, bulk=False): es = get_elasticsearch_handle() if contact_doc.data: form_model = get_form_model_by_code(dbm, REGISTRATION_FORM_CODE) datasender_dict = _create_contact_dict(dbm, contact_doc, form_model) if bulk: datasender_dict.update({'id': contact_doc.id}) return es.index_op(datasender_dict, index=dbm.database_name, doc_type=REPORTER_ENTITY_TYPE[0], id=contact_doc.id) es.index(dbm.database_name, REPORTER_ENTITY_TYPE[0], datasender_dict, id=contact_doc.id) es.refresh(dbm.database_name)
def entity_search_update(entity_doc, dbm): if entity_doc.aggregation_paths['_type'] == REPORTER_ENTITY_TYPE: update_datasender_index(entity_doc, dbm) return es = get_elasticsearch_handle() if entity_doc.data: entity_type = entity_doc.aggregation_paths['_type'][0].lower() form_model = get_form_model_by_entity_type(dbm, [entity_type]) es.index(dbm.database_name, entity_type, subject_dict(entity_type, entity_doc, dbm, form_model), id=entity_doc.id) es.refresh(dbm.database_name)
def delete_organizations(modeladmin, request, queryset): orgs = queryset.filter(status__in=['Deactivated', "Pending Activation"]) for organization in orgs: dbm = get_database_manager_for_org(organization) organization.purge_all_data() del dbm.server[dbm.database_name] feed_database_name = "feed_" + dbm.database_name feed_dbm = feeds_db_for(feed_database_name) del feed_dbm.server[feed_database_name] es = get_elasticsearch_handle() try: es.delete_index(dbm.database_name) except Exception as e: logging.info("Could not delete index " + str(e.message))
def populate_contact_index(dbm): rows = dbm.database.iterview('datasender_by_mobile/datasender_by_mobile', 100, reduce=False, include_docs=True) actions = [] es = get_elasticsearch_handle() for row in rows: contact = Contact.__document_class__.wrap(row.get('doc')) action = contact_search_update(contact, dbm, bulk=True) if action is not None: actions.append(action) if len(actions) == settings.ES_INDEX_RECREATION_BATCH: es.bulk(actions) actions = [] if len(actions) > 0: es.bulk(actions)
def add_custom_group_field_to_data_sender_mapping(db_name): logger = logging.getLogger(db_name) logger.info('Starting Migration') es = get_elasticsearch_handle() fields = [ TextField(name="customgroups", code='customgroups', label='Custom groups') ] es.put_mapping(db_name, 'reporter', get_fields_mapping('reg', fields)) logger.info('Completed Migration') mark_as_completed(db_name)
def recreate_index_for_questionnaire(database_name, form_code): start = time.time() es = get_elasticsearch_handle() dbm = get_db_manager(database_name) try: form_model = get_form_model_by_code(dbm, form_code) delete_mapping(database_name, form_model.id) create_mapping_for_form_model(dbm, form_model) populate_submission_index(dbm, form_model.id) except Exception as e: logger.exception("recreate index failed for database %s for" %dbm.database_name) logger.info('Time taken (seconds) for indexing {database_name} : {timetaken}' .format(database_name=database_name,timetaken=(time.time()-start)))
def populate_entity_index(dbm): rows = dbm.database.iterview('by_short_codes/by_short_codes', 100, reduce=False, include_docs=True) actions = [] es = get_elasticsearch_handle() for row in rows: try: entity = Entity.__document_class__.wrap(row.get('doc')) action = entity_search_update(entity, dbm, bulk=True) if action is not None: actions.append(action) if len(actions) == settings.ES_INDEX_RECREATION_BATCH: es.bulk(actions) actions = [] except Exception as e: raise e if len(actions) > 0: es.bulk(actions)
def recreate_subject_index(db_name): logger = logging.getLogger(db_name) try: logger.info('Starting indexing') dbm = get_db_manager(db_name) form_models = dbm.database.query(map_form_model_for_subjects) es = get_elasticsearch_handle() for row in form_models: try: form_model = FormModel.get(dbm, row.id) entity_type = form_model.entity_type[0] try: es.delete_all(db_name, entity_type) except Exception as ignore: pass create_subject_mapping(dbm, form_model) entity_docs = [] for entity_doc in get_all_entities_include_voided( dbm, [entity_type]): try: if entity_doc.data: subject = subject_dict(entity_type, entity_doc, dbm, form_model) subject.update({'id': entity_doc.id}) entity_docs.append(subject) except Exception as e: logger.error("Failed to index subject with id %s" % entity_doc.id) logger.error(e) if entity_docs: es.bulk_index(dbm.database_name, entity_type, entity_docs) es.refresh(dbm.database_name) logger.info('Changed index for subject with codes ' + str([a.get('id') for a in entity_docs])) except Exception as e: logger.error("Failed to create subject mapping for %s" % row.id) logger.error(e.message) logger.info('Completed Indexing') mark_as_completed(db_name) except Exception as e: logger.exception(e.message)
def populate_submission_index(dbm, form_model_id=None): logger = logging.getLogger() if form_model_id is None: questionnaires = dbm.load_all_rows_in_view("surveyresponse_by_questionnaire_id", reduce=True, group=True) for q in questionnaires: logger.info('Processing questionnaire id {q}'.format(q=q.key)) populate_submission_index(dbm, q.key) else: start = time.time() rows = get_survey_responses_by_form_model_id(dbm, form_model_id) form_model = FormModel.get(dbm, form_model_id) logger = logging.getLogger(form_model.name) ignored = 0 counter = 0 error_count = 0 actions = [] es = get_elasticsearch_handle() for row in rows: try: survey_response = SurveyResponseDocument._wrap_row(row) submission_action = update_submission_search_index(survey_response, dbm, refresh_index=False, form_model=form_model, bulk=True) actions.append(submission_action) if len(actions) == settings.ES_INDEX_RECREATION_BATCH: es.bulk(actions, index=dbm.database_name, doc_type=form_model.id) actions = [] counter += 1 logger.info('No of submissions processed {counter}'.format(counter=counter)) except FormModelDoesNotExistsException as e: ignored += 1 logger.warning(e.message) # ignore orphaned submissions On changing form code! except Exception as ex: logger.exception('Exception occurred') error_count += 1 if len(actions) > 0: es.bulk(actions, index=dbm.database_name, doc_type=form_model.id) logger.warning("No of submissions ignored: {ignored}".format(ignored=ignored)) logger.warning("No of submissions had errors:{errors}".format(errors=error_count)) logger.info('Time taken (seconds) for indexing {counter} submissions of questionnaire {q} : {timetaken}' .format(counter=counter,q=form_model_id,timetaken=(time.time()-start)))
def update_submission_search_index(submission_doc, dbm, refresh_index=True, form_model=None, bulk=False): es = get_elasticsearch_handle() if form_model is None: form_model = FormModel.get(dbm, submission_doc.form_model_id) search_dict = _meta_fields(submission_doc, dbm) _update_with_form_model_fields(dbm, submission_doc, search_dict, form_model) if bulk: return es.index_op(search_dict, doc_type=form_model.id, index=dbm.database_name, id=submission_doc.id) es.index(dbm.database_name, form_model.id, search_dict, id=submission_doc.id, refresh=refresh_index)
def create_submission_index(dbm, row): form_model = Project.new_from_doc(dbm, ProjectDocument.wrap(row["value"])) form_code = form_model.form_code start_key = [form_code] end_key = [form_code, {}] rows = dbm.database.iterview("surveyresponse/surveyresponse", 1000, reduce=False, include_docs=False, startkey=start_key, endkey=end_key) es = get_elasticsearch_handle(timeout=600) survey_response_docs = [] for row in rows: survey_response = SurveyResponseDocument._wrap_row(row) search_dict = _meta_fields(survey_response, dbm) _update_with_form_model_fields(dbm, survey_response, search_dict, form_model) search_dict.update({'id': survey_response.id}) survey_response_docs.append(search_dict) if survey_response_docs: es.bulk_index(dbm.database_name, form_model.id, survey_response_docs)
import sys from datawinners.search.index_utils import get_elasticsearch_handle if __name__ == "__main__" and __package__ is None: sys.path.insert(0, ".") from datawinners.main.couchdb.utils import all_db_names from datawinners.main.management.commands.recreate_search_indexes import recreate_index_for_db import logging from migration.couch.utils import migrate, mark_as_completed def create_search_indices_for_subjects(db_name): logger = logging.getLogger(db_name) try: mark_as_completed(db_name) logger.info('Starting indexing') recreate_index_for_db(db_name, es) except Exception as e: logger.exception("Failed DB: %s with message %s" % (db_name, e.message)) logger.info('Completed Indexing') es = get_elasticsearch_handle() migrate(all_db_names(), create_search_indices_for_subjects, version=(8, 0, 1), threads=1)
logger.info('Starting indexing') dbm = get_db_manager(db_name) es = get_elasticsearch_handle(timeout=600) form_model = get_form_model_by_code(dbm, REGISTRATION_FORM_CODE) datasenders = [] for entity in get_all_entities_include_voided(dbm, REPORTER_ENTITY_TYPE): if not entity.data: continue if entity.is_void() or entity.short_code == 'test': datasender_dict = _create_datasender_dict( dbm, entity, REPORTER, form_model) datasender_dict.update({'id': entity.id}) datasenders.append(datasender_dict) if datasenders: es.bulk_index(dbm.database_name, REPORTER, datasenders) logger.info('Created index for datasenders with ids :' + str([a.get('id') for a in datasenders])) logger.info('Completed Indexing') mark_as_successful(db_name) except Exception as e: logger.exception(e.message) es = get_elasticsearch_handle(timeout=600) migrate(all_db_names(), create_search_indices_for_deleted_datasender, version=(10, 0, 2), threads=1)
def create_submission_mapping(dbm, latest_form_model, old_form_model): es = get_elasticsearch_handle() SubmissionSearchStore(dbm, es, latest_form_model, old_form_model).update_store()
def __init__(self, dbm, latest_form_model, old_form_model): self.dbm = dbm self.es = get_elasticsearch_handle() self.latest_form_model = latest_form_model self.old_form_model = old_form_model