Esempio n. 1
0
    def process_bulk_docs(self, docs, progress_logger):
        if len(docs) == 0:
            return True

        pillow_logging.info("Processing batch of %s docs", len(docs))

        changes = [
            self._doc_to_change(doc) for doc in docs
            if self.process_deletes or not is_deletion(doc.get('doc_type'))
        ]
        error_collector = ErrorCollector()

        bulk_changes = build_bulk_payload(self.index_info, changes,
                                          self.doc_transform, error_collector)

        for change, exception in error_collector.errors:
            pillow_logging.error("Error procesing doc %s: %s (%s)", change.id,
                                 type(exception), exception)

        es_interface = ElasticsearchInterface(self.es)
        try:
            es_interface.bulk_ops(bulk_changes)
        except (ESBulkIndexError, ES2BulkIndexError, ES7BulkIndexError) as e:
            pillow_logging.error("Bulk index errors\n%s", e.errors)
        except Exception:
            pillow_logging.exception("\tException sending payload to ES")
            return False

        return True
Esempio n. 2
0
def run_query(index_name,
              q,
              debug_host=None,
              es_instance_alias=ES_DEFAULT_INSTANCE):
    # the debug_host parameter allows you to query another env for testing purposes
    if debug_host:
        if not settings.DEBUG:
            raise Exception("You can only specify an ES env in DEBUG mode")
        es_host = settings.ELASTICSEARCH_DEBUG_HOSTS[debug_host]
        es_instance = Elasticsearch([{
            'host': es_host,
            'port': settings.ELASTICSEARCH_PORT
        }],
                                    timeout=3,
                                    max_retries=0)
    else:
        es_instance = get_es_instance(es_instance_alias)

    es_interface = ElasticsearchInterface(es_instance)

    es_meta = ES_META[index_name]
    try:
        results = es_interface.search(es_meta.alias, es_meta.type, body=q)
        report_and_fail_on_shard_failures(results)
        return results
    except ElasticsearchException as e:
        raise ESError(e)
Esempio n. 3
0
def scan(client, query=None, scroll='5m', **kwargs):
    """
    This is a copy of elasticsearch.helpers.scan, except this function returns
    a ScanResult (which includes the total number of documents), and removes
    some options from scan that we aren't using.

    Simple abstraction on top of the
    :meth:`~elasticsearch.Elasticsearch.scroll` api - a simple iterator that
    yields all hits as returned by underlining scroll requests.

    :arg client: instance of :class:`~elasticsearch.Elasticsearch` to use
    :arg query: body for the :meth:`~elasticsearch.Elasticsearch.search` api
    :arg scroll: Specify how long a consistent view of the index should be
        maintained for scrolled search

    Any additional keyword arguments will be passed to the initial
    :meth:`~elasticsearch.Elasticsearch.search` call::

        scan(es,
            query={"match": {"title": "python"}},
            index="orders-*",
            doc_type="books"
        )

    """
    kwargs['search_type'] = 'scan'
    # initial search
    es_interface = ElasticsearchInterface(client)
    initial_resp = es_interface.search(body=query, scroll=scroll, **kwargs)

    def fetch_all(initial_response):

        resp = initial_response
        scroll_id = resp.get('_scroll_id')
        if scroll_id is None:
            return
        iteration = 0

        while True:

            start = int(time.time() * 1000)
            resp = es_interface.scroll(scroll_id, scroll=scroll)
            for hit in resp['hits']['hits']:
                yield hit

            # check if we have any errrors
            if resp["_shards"]["failed"]:
                logging.getLogger('elasticsearch.helpers').warning(
                    'Scroll request has failed on %d shards out of %d.',
                    resp['_shards']['failed'], resp['_shards']['total'])

            scroll_id = resp.get('_scroll_id')
            # end of scroll
            if scroll_id is None or not resp['hits']['hits']:
                break

            iteration += 1

    count = initial_resp.get("hits", {}).get("total", None)
    return ScanResult(count, fetch_all(initial_resp))
Esempio n. 4
0
 def setUp(self):
     self.index = TEST_INDEX_INFO.index
     self.es_alias = TEST_INDEX_INFO.alias
     self.es = get_es_new()
     self.es_interface = ElasticsearchInterface(self.es)
     with trap_extra_setup(ConnectionError):
         ensure_index_deleted(self.index)
Esempio n. 5
0
def _get_latest_doc_from_index(es_alias, sort_field):
    """
    Query elasticsearch index sort descending by the sort field
    and get the doc_id back so we can then do a rev-update check.

    This si because there's no direct view known ahead of time what's inside the report* index,
    so just get it directly from the index and do the modify check workflow.
    """
    recent_query = {
        "filter": {
            "match_all": {}
        },
        "sort": {
            sort_field: "desc"
        },
        "size": 1
    }
    es_interface = ElasticsearchInterface(get_es_new())

    try:
        res = es_interface.search(es_alias, body=recent_query)
        if 'hits' in res:
            if 'hits' in res['hits']:
                result = res['hits']['hits'][0]
                return result['_source']['_id']

    except Exception as ex:
        logging.error("Error querying get_latest_doc_from_index[%s]: %s" %
                      (es_alias, ex))
        return None
Esempio n. 6
0
    def handle(self, **options):
        es = get_es_new()
        es_interface = ElasticsearchInterface(es)
        # call this before getting existing indices because apparently getting the pillow will create the index
        # if it doesn't exist
        # fixme: this can delete real indices if a reindex is in progress
        found_indices = set(es_interface.get_aliases().keys())
        expected_indices = {
            info.index
            for info in get_all_expected_es_indices()
        }
        print(expected_indices)

        if options['verbose']:
            if expected_indices - found_indices:
                print('the following indices were not found:\n{}\n'.format(
                    '\n'.join(expected_indices - found_indices)))
            print('expecting {} indices:\n{}\n'.format(
                len(expected_indices), '\n'.join(sorted(expected_indices))))

        unref_indices = set([
            index for index in found_indices if index not in expected_indices
        ])
        if unref_indices:
            if options['delete']:
                _delete_indices(es, unref_indices)
            else:
                _close_indices(es, unref_indices, options['noinput'])
        else:
            print('no indices need pruning')
Esempio n. 7
0
    def process_bulk_docs(self, docs, progress_logger):
        if not docs:
            return True

        pillow_logging.info("Processing batch of %s docs", len(docs))
        changes = []
        for doc in docs:
            change = self._doc_to_change(doc)  # de-dupe the is_deletion check
            if self.process_deletes or not change.deleted:
                changes.append(change)
        error_collector = ErrorCollector()

        bulk_changes = build_bulk_payload(changes, self.doc_transform,
                                          error_collector)

        for change, exception in error_collector.errors:
            pillow_logging.error("Error processing doc %s: %s (%s)", change.id,
                                 type(exception), exception)

        es_interface = ElasticsearchInterface(self.es)
        try:
            es_interface.bulk_ops(self.index_info.alias, self.index_info.type,
                                  bulk_changes)
        except BulkIndexError as e:
            pillow_logging.error("Bulk index errors\n%s", e.errors)
        except Exception as exc:
            pillow_logging.exception(
                "Error sending bulk payload to Elasticsearch: %s", exc)
            return False

        return True
Esempio n. 8
0
    def setUp(self):
        self.es = get_es_new()
        self.es_interface = ElasticsearchInterface(self.es)
        self.index = TEST_INDEX_INFO.index

        with trap_extra_setup(ConnectionError):
            ensure_index_deleted(self.index)
            initialize_index_and_mapping(self.es, TEST_INDEX_INFO)
Esempio n. 9
0
 def tearDownClass(cls):
     interface = ElasticsearchInterface(cls.es)
     for form in cls.forms:
         interface.delete_doc(XFORM_INDEX_INFO.alias, XFORM_INDEX_INFO.type,
                              form.wrapped_form.form_id)
     cls.es.indices.refresh(XFORM_INDEX_INFO.index)
     cls.forms = []
     super(XFormESTestCase, cls).tearDownClass()
Esempio n. 10
0
def send_to_elasticsearch(index, doc_type, doc_id, es_getter, name, data=None,
                          retries=MAX_RETRIES, propagate_failure=settings.UNIT_TESTING,
                          update=False, delete=False, es_merge_update=False):
    """
    More fault tolerant es.put method
    kwargs:
        es_merge_update: Set this to True to use Elasticsearch.update instead of Elasticsearch.index
            which merges existing ES doc and current update. If this is set to False, the doc will be replaced

    """
    data = data if data is not None else {}
    current_tries = 0
    es_interface = ElasticsearchInterface(es_getter())
    retries = 1 if settings.UNIT_TESTING else retries
    while current_tries < retries:
        try:
            if delete:
                es_interface.delete_doc(index, doc_type, doc_id)
            elif update:
                params = {'retry_on_conflict': 2}
                if es_merge_update:
                    es_interface.update_doc_fields(index, doc_type, doc_id, fields=data, params=params)
                else:
                    es_interface.update_doc(index, doc_type, doc_id, doc=data, params=params)
            else:
                es_interface.create_doc(index, doc_type, doc_id, doc=data)
            break
        except ConnectionError as ex:
            current_tries += 1
            pillow_logging.error("[{}] put_robust error {} attempt {}/{}".format(
                name, ex, current_tries, retries))

            if current_tries == retries:
                message = "[{}] Max retry error on {}/{}/{}:\n\n{}".format(
                    name, index, doc_type, doc_id, traceback.format_exc())
                if propagate_failure:
                    raise PillowtopIndexingError(message)
                else:
                    pillow_logging.error(message)

            time.sleep(math.pow(RETRY_INTERVAL, current_tries))
        except RequestError:
            error_message = (
                "Pillowtop put_robust error [{}]:\n\n{}\n\tpath: {}/{}/{}\n\t{}".format(
                    name, traceback.format_exc(), index, doc_type, doc_id, list(data))
            )

            if propagate_failure:
                raise PillowtopIndexingError(error_message)
            else:
                pillow_logging.error(error_message)
            break
        except ConflictError:
            break  # ignore the error if a doc already exists when trying to create it in the index
        except NotFoundError:
            break
Esempio n. 11
0
def mget_query(index_name, ids):
    if not ids:
        return []

    es_interface = ElasticsearchInterface(get_es_new())
    es_meta = ES_META[index_name]
    try:
        return es_interface.get_bulk_docs(es_meta.alias, es_meta.type, ids)
    except ElasticsearchException as e:
        raise ESError(e)
Esempio n. 12
0
def form_ids_in_es(form_ids):
    query = {"filter": {"ids": {"values": list(form_ids)}}}
    es_interface = ElasticsearchInterface(get_es_new())
    es_meta = ES_META['forms']
    results = es_interface.search(es_meta.index, es_meta.type, query,
                                  params={'size': CHUNK_SIZE})
    if 'hits' in results:
        for hit in results['hits']['hits']:
            es_doc = hit['_source']
            yield es_doc['_id']
Esempio n. 13
0
 def _delete_docs_from_es(cls, doc_ids, index_info):
     es_interface = ElasticsearchInterface(cls.elasticsearch)
     refresh = False
     for doc_id in doc_ids:
         try:
             es_interface.delete_doc(index_info.alias, index_info.type,
                                     doc_id)
         except elasticsearch.NotFoundError:
             pass
         else:
             refresh = True
     if refresh:
         cls.elasticsearch.indices.refresh(index_info.index)
Esempio n. 14
0
def _check_es_rev(es_alias, doc_id, couch_revs):
    """
    Specific docid and rev checker.

    es_alias: Elasticsearch alias
    doc_id: id to query in ES
    couch_rev: target couch_rev that you want to match
    """
    es_interface = ElasticsearchInterface(get_es_new())
    doc_id_query = {
        "filter": {
            "ids": {
                "values": [doc_id]
            }
        },
        "fields": ["_id", "_rev"]
    }

    try:
        res = es_interface.search(es_alias, body=doc_id_query)
        status = False
        message = "Not in sync"

        if 'hits' in res:
            if res['hits'].get('total', 0) == 0:
                status = False
                # if doc doesn't exist it's def. not in sync
                message = "Not in sync %s" % es_alias
            elif 'hits' in res['hits']:
                fields = res['hits']['hits'][0]['fields']
                if fields['_rev'] in couch_revs:
                    status = True
                    message = "%s OK" % es_alias
                else:
                    status = False
                    # less likely, but if it's there but the rev is off
                    message = "Not in sync - %s stale" % es_alias
        else:
            status = False
            message = "Not in sync - query failed"
            notify_error("%s: %s" % (message, str(res)))
    except Exception as ex:
        message = "ES Error: %s" % ex
        status = False
    return {
        es_alias: {
            "es_alias": es_alias,
            "status": status,
            "message": message
        }
    }
Esempio n. 15
0
def update_unknown_user_from_form_if_necessary(es, doc_dict):
    if doc_dict is None:
        return

    user_id, username, domain, xform_id = _get_user_fields_from_form_doc(
        doc_dict)

    if (not user_id or user_id in WEIRD_USER_IDS
            or _user_exists_in_couch(user_id)):
        return

    if not doc_exists_in_es(USER_INDEX_INFO, user_id):
        doc_type = "AdminUser" if username == "admin" else "UnknownUser"
        doc = {
            "_id": user_id,
            "domain": domain,
            "username": username,
            "first_form_found_in": xform_id,
            "doc_type": doc_type,
        }
        if domain:
            doc["domain_membership"] = {"domain": domain}
        ElasticsearchInterface(es).create_doc(USER_INDEX_INFO.alias,
                                              ES_META['users'].type,
                                              doc=doc,
                                              doc_id=user_id)
Esempio n. 16
0
def set_index_normal_settings(es, index):
    """
    Normal indexing configuration
    """
    from pillowtop.index_settings import INDEX_STANDARD_SETTINGS
    return ElasticsearchInterface(es).update_index_settings(
        index, INDEX_STANDARD_SETTINGS)
Esempio n. 17
0
 def __init__(self, elasticsearch, index_info, doc_prep_fn=None, doc_filter_fn=None, change_filter_fn=None):
     self.change_filter_fn = change_filter_fn or noop_filter
     self.doc_filter_fn = doc_filter_fn or noop_filter
     self.elasticsearch = elasticsearch
     self.es_interface = ElasticsearchInterface(self.elasticsearch)
     self.index_info = index_info
     self.doc_transform_fn = doc_prep_fn or identity
Esempio n. 18
0
def set_index_reindex_settings(es, index):
    """
    Set a more optimized setting setup for fast reindexing
    """
    from pillowtop.index_settings import INDEX_REINDEX_SETTINGS
    return ElasticsearchInterface(es).update_index_settings(
        index, INDEX_REINDEX_SETTINGS)
Esempio n. 19
0
    def setUp(self):
        self.index = TEST_ES_INFO.alias
        self.type = TEST_ES_INFO.type
        self.es = get_es_new()
        # tweak mapping
        self.mapping = {"properties": {"message": {"type": "string"}}}
        meta = {"mapping": self.mapping}
        # setup index
        if self.es.indices.exists(self.index):
            self.es.indices.delete(self.index)
        self.es.indices.create(index=self.index, body=meta)

        # insert a doc so we get some mapping data
        interface = ElasticsearchInterface(self.es)
        ident = uuid.uuid4().hex
        doc = {"message": "hello"}
        interface.index_doc(self.index, self.type, ident, doc)
        self.es.indices.refresh(self.index)
Esempio n. 20
0
def get_case_name(case_id):
    from corehq.pillows.mappings.case_mapping import CASE_INDEX_INFO
    try:
        result = ElasticsearchInterface(get_es_new()).get_doc(
            CASE_INDEX_INFO.alias,
            CASE_INDEX_INFO.type,
            case_id,
            source_includes=['name'])
    except ElasticsearchException:
        return None

    return result['name']
Esempio n. 21
0
 def test_assume_alias(self):
     initialize_index_and_mapping(self.es, TEST_INDEX_INFO)
     doc_id = uuid.uuid4().hex
     doc = {'_id': doc_id, 'doc_type': 'CommCareCase', 'type': 'mother'}
     ElasticsearchInterface(get_es_new()).index_doc(
         self.index, TEST_INDEX_INFO.type, doc_id, {'doc_type': 'CommCareCase', 'type': 'mother'},
         verify_alias=False)
     self.assertEqual(1, get_doc_count(self.es, self.index))
     assume_alias(self.es, self.index, TEST_INDEX_INFO.alias)
     es_doc = self.es_interface.get_doc(TEST_INDEX_INFO.alias, TEST_INDEX_INFO.type, doc_id)
     for prop in doc:
         self.assertEqual(doc[prop], es_doc[prop])
Esempio n. 22
0
def delete_case_search_cases(domain):
    if domain is None or isinstance(domain, dict):
        raise TypeError("Domain attribute is required")

    get_es_new().indices.refresh(CASE_SEARCH_INDEX)
    case_ids = CaseSearchES().domain(domain).values_list('_id', flat=True)

    ElasticsearchInterface(get_es_new()).bulk_ops([{
        "_op_type": "delete",
        "_index": CASE_SEARCH_INDEX,
        "_type": CASE_ES_TYPE,
        "_id": case_id,
    } for case_id in case_ids])
Esempio n. 23
0
    def process_bulk_docs(self, docs):
        if len(docs) == 0:
            return True

        pillow_logging.info("Processing batch of %s docs", len((docs)))

        changes = [self._doc_to_change(doc) for doc in docs]
        error_collector = ErrorCollector()

        bulk_changes = build_bulk_payload(self.index_info, changes,
                                          self.doc_transform, error_collector)

        for change, exception in error_collector.errors:
            pillow_logging.error("Error procesing doc %s: %s (%s)", change.id,
                                 type(exception), exception)

        es_interface = ElasticsearchInterface(self.es)
        try:
            es_interface.bulk_ops(bulk_changes)
        except Exception:
            pillow_logging.exception("\tException sending payload to ES")
            return False

        return True
Esempio n. 24
0
    def setUpClass(cls):
        super().setUpClass()
        cls.domain = uuid.uuid4().hex
        cls.case_ids = [uuid.uuid4().hex for i in range(4)]
        with drop_connected_signals(case_post_save), drop_connected_signals(
                sql_case_post_save):
            for case_id in cls.case_ids:
                create_form_for_test(cls.domain, case_id)

        cls.es = get_es_new()
        cls.es_interface = ElasticsearchInterface(cls.es)
        cls.index = TEST_INDEX_INFO.index

        with trap_extra_setup(ConnectionError):
            ensure_index_deleted(cls.index)
            initialize_index_and_mapping(cls.es, TEST_INDEX_INFO)
Esempio n. 25
0
def delete_case_search_cases(domain):
    if domain is None or isinstance(domain, dict):
        raise TypeError("Domain attribute is required")

    get_es_new().indices.refresh(CASE_SEARCH_INDEX)
    case_ids = CaseSearchES().domain(domain).values_list('_id', flat=True)

    op_kwargs = {
        "_op_type": "delete",
        "_index": CASE_SEARCH_INDEX_INFO.alias,
        "_type": CASE_ES_TYPE,
    }
    if settings.ELASTICSEARCH_MAJOR_VERSION == 7:
        op_kwargs.pop('_type')

    ElasticsearchInterface(get_es_new()).bulk_ops([{
        **op_kwargs,
        "_id": case_id,
    } for case_id in case_ids])
Esempio n. 26
0
class ElasticPillowTest(SimpleTestCase):
    def setUp(self):
        self.index = TEST_INDEX_INFO.index
        self.es_alias = TEST_INDEX_INFO.alias
        self.es = get_es_new()
        self.es_interface = ElasticsearchInterface(self.es)
        with trap_extra_setup(ConnectionError):
            ensure_index_deleted(self.index)

    def tearDown(self):
        ensure_index_deleted(self.index)

    def test_create_index(self):
        initialize_index_and_mapping(self.es, TEST_INDEX_INFO)
        # make sure it was created
        self.assertTrue(self.es.indices.exists(self.index))
        # check the subset of settings we expected to set
        settings_back = self.es.indices.get_settings(
            self.index)[self.index]['settings']
        self.assertEqual(
            TEST_INDEX_INFO.meta['settings']['analysis'],
            settings_back['index']['analysis'],
        )
        self.es.indices.delete(self.index)
        self.assertFalse(self.es.indices.exists(self.index))

    def test_mapping_initialization(self):
        initialize_index_and_mapping(self.es, TEST_INDEX_INFO)
        self.assertTrue(mapping_exists(self.es, TEST_INDEX_INFO))
        mapping = get_index_mapping(self.es, self.index, TEST_INDEX_INFO.type)
        # we can't compare the whole dicts because ES adds a bunch of stuff to them
        self.assertEqual(
            transform_for_es7(
                TEST_INDEX_INFO.mapping)['properties']['doc_type'],
            mapping['properties']['doc_type'])

    def test_refresh_index(self):
        initialize_index_and_mapping(self.es, TEST_INDEX_INFO)
        doc_id = uuid.uuid4().hex
        doc = {'_id': doc_id, 'doc_type': 'CommCareCase', 'type': 'mother'}
        self.assertEqual(0, get_doc_count(self.es, self.es_alias))
        self.es_interface.create_doc(self.es_alias, 'case', doc_id, doc)
        self.assertEqual(
            0, get_doc_count(self.es, self.es_alias, refresh_first=False))
        self.es.indices.refresh(self.index)
        self.assertEqual(
            1, get_doc_count(self.es, self.es_alias, refresh_first=False))

    def test_index_operations(self):
        initialize_index_and_mapping(self.es, TEST_INDEX_INFO)
        self.assertTrue(self.es.indices.exists(self.index))

        # delete and check
        self.es.indices.delete(self.index)
        self.assertFalse(self.es.indices.exists(self.index))

        # create and check
        initialize_index(self.es, TEST_INDEX_INFO)
        self.assertTrue(self.es.indices.exists(self.index))

    def test_assume_alias(self):
        initialize_index_and_mapping(self.es, TEST_INDEX_INFO)
        doc_id = uuid.uuid4().hex
        doc = {'_id': doc_id, 'doc_type': 'CommCareCase', 'type': 'mother'}
        ElasticsearchInterface(get_es_new()).create_doc(
            self.index, TEST_INDEX_INFO.type, doc_id, {
                'doc_type': 'CommCareCase',
                'type': 'mother'
            }, False)
        self.assertEqual(1, get_doc_count(self.es, self.index))
        assume_alias(self.es, self.index, TEST_INDEX_INFO.alias)
        es_doc = self.es_interface.get_doc(TEST_INDEX_INFO.alias,
                                           TEST_INDEX_INFO.type, doc_id)
        for prop in doc:
            self.assertEqual(doc[prop], es_doc[prop])

    def test_assume_alias_deletes_old_aliases(self):
        # create a different index and set the alias for it
        initialize_index_and_mapping(self.es, TEST_INDEX_INFO)
        new_index = 'test_index-with-duplicate-alias'
        if not self.es.indices.exists(new_index):
            self.es.indices.create(index=new_index)
        self.es.indices.put_alias(new_index, TEST_INDEX_INFO.alias)
        self.addCleanup(functools.partial(ensure_index_deleted, new_index))

        # make sure it's there in the other index
        aliases = self.es_interface.get_aliases()
        self.assertEqual([TEST_INDEX_INFO.alias],
                         list(aliases[new_index]['aliases']))

        # assume alias and make sure it's removed (and added to the right index)
        assume_alias(self.es, self.index, TEST_INDEX_INFO.alias)
        aliases = self.es_interface.get_aliases()

        self.assertEqual(0, len(aliases[new_index]['aliases']))
        self.assertEqual([TEST_INDEX_INFO.alias],
                         list(aliases[self.index]['aliases']))

    def test_update_settings(self):
        initialize_index_and_mapping(self.es, TEST_INDEX_INFO)
        self.es_interface.update_index_settings(self.index,
                                                INDEX_REINDEX_SETTINGS)
        index_settings_back = self.es.indices.get_settings(
            self.index)[self.index]['settings']
        self._compare_es_dicts(INDEX_REINDEX_SETTINGS, index_settings_back)
        self.es_interface.update_index_settings(self.index,
                                                INDEX_STANDARD_SETTINGS)
        index_settings_back = self.es.indices.get_settings(
            self.index)[self.index]['settings']
        self._compare_es_dicts(INDEX_STANDARD_SETTINGS, index_settings_back)

    def test_set_index_reindex(self):
        initialize_index_and_mapping(self.es, TEST_INDEX_INFO)
        set_index_reindex_settings(self.es, self.index)
        index_settings_back = self.es.indices.get_settings(
            self.index)[self.index]['settings']
        self._compare_es_dicts(INDEX_REINDEX_SETTINGS, index_settings_back)

    def test_set_index_normal(self):
        initialize_index_and_mapping(self.es, TEST_INDEX_INFO)
        set_index_normal_settings(self.es, self.index)
        index_settings_back = self.es.indices.get_settings(
            self.index)[self.index]['settings']
        self._compare_es_dicts(INDEX_STANDARD_SETTINGS, index_settings_back)

    def _compare_es_dicts(self, expected, returned):
        sub_returned = returned['index']
        should_not_exist = disallowed_settings_by_es_version[
            settings.ELASTICSEARCH_MAJOR_VERSION]
        for key, value in expected['index'].items():
            if key in should_not_exist:
                continue
            split_key = key.split('.')
            returned_value = sub_returned[split_key[0]]
            for sub_key in split_key[1:]:
                returned_value = returned_value[sub_key]
            self.assertEqual(str(value), returned_value)

        for disallowed_setting in should_not_exist:
            self.assertNotIn(
                disallowed_setting, sub_returned,
                '{} is disallowed and should not be in the index settings'.
                format(disallowed_setting))
Esempio n. 27
0
class TestSendToElasticsearch(SimpleTestCase):
    def setUp(self):
        self.es = get_es_new()
        self.es_interface = ElasticsearchInterface(self.es)
        self.index = TEST_INDEX_INFO.index
        self.es_alias = TEST_INDEX_INFO.alias

        with trap_extra_setup(ConnectionError):
            ensure_index_deleted(self.index)
            initialize_index_and_mapping(self.es, TEST_INDEX_INFO)

    def tearDown(self):
        ensure_index_deleted(self.index)

    @mock.patch('corehq.apps.hqadmin.views.data.ES_META', TEST_ES_META)
    @mock.patch('corehq.apps.es.es_query.ES_META', TEST_ES_META)
    @mock.patch('corehq.elastic.ES_META', TEST_ES_META)
    def test_create_doc(self):
        doc = {
            '_id': uuid.uuid4().hex,
            'doc_type': 'MyCoolDoc',
            'property': 'foo'
        }
        self._send_to_es_and_check(doc)
        res = lookup_doc_in_es(doc['_id'], self.index)
        self.assertEqual(res, doc)

    def _send_to_es_and_check(self,
                              doc,
                              update=False,
                              es_merge_update=False,
                              delete=False,
                              esgetter=None):
        if update and es_merge_update:
            old_doc = self.es_interface.get_doc(self.es_alias,
                                                TEST_INDEX_INFO.type,
                                                doc['_id'])

        send_to_elasticsearch(TEST_INDEX_INFO,
                              doc_type=TEST_INDEX_INFO.type,
                              doc_id=doc['_id'],
                              es_getter=esgetter or get_es_new,
                              name='test',
                              data=doc,
                              es_merge_update=es_merge_update,
                              delete=delete)

        if not delete:
            self.assertEqual(1, get_doc_count(self.es, self.index))
            es_doc = self.es_interface.get_doc(self.es_alias,
                                               TEST_INDEX_INFO.type,
                                               doc['_id'])
            if es_merge_update:
                old_doc.update(es_doc)
                for prop in doc:
                    self.assertEqual(doc[prop], old_doc[prop])
            else:
                for prop in doc:
                    self.assertEqual(doc[prop], es_doc[prop])
                self.assertTrue(all(prop in doc for prop in es_doc))
        else:
            self.assertEqual(0, get_doc_count(self.es, self.index))

    def test_update_doc(self):
        doc = {
            '_id': uuid.uuid4().hex,
            'doc_type': 'MyCoolDoc',
            'property': 'bar'
        }
        self._send_to_es_and_check(doc)

        doc['property'] = 'bazz'
        self._send_to_es_and_check(doc, update=True)

    def test_replace_doc(self):
        doc = {
            '_id': uuid.uuid4().hex,
            'doc_type': 'MyCoolDoc',
            'property': 'bar'
        }
        self._send_to_es_and_check(doc)

        del doc['property']
        self._send_to_es_and_check(doc, update=True)

    def test_merge_doc(self):
        doc = {
            '_id': uuid.uuid4().hex,
            'doc_type': 'MyCoolDoc',
            'property': 'bar'
        }
        self._send_to_es_and_check(doc)

        update = doc.copy()
        del update['property']
        update['new_prop'] = 'new_val'
        # merging should still keep old 'property'
        self._send_to_es_and_check(update, update=True, es_merge_update=True)

    def test_delete_doc(self):
        doc = {
            '_id': uuid.uuid4().hex,
            'doc_type': 'MyCoolDoc',
            'property': 'bar'
        }
        self._send_to_es_and_check(doc)

        self._send_to_es_and_check(doc, delete=True)

    def test_connection_failure(self):
        def _bad_es_getter():
            from corehq.util.es.elasticsearch import Elasticsearch
            return Elasticsearch(
                [{
                    'host': settings.ELASTICSEARCH_HOST,
                    'port': settings.ELASTICSEARCH_PORT - 2,  # bad port
                }],
                timeout=0.1,
            )

        doc = {
            '_id': uuid.uuid4().hex,
            'doc_type': 'MyCoolDoc',
            'property': 'bar'
        }

        with self.assertRaises(PillowtopIndexingError):
            self._send_to_es_and_check(doc, esgetter=_bad_es_getter)

    def test_not_found(self):
        doc = {
            '_id': uuid.uuid4().hex,
            'doc_type': 'MyCoolDoc',
            'property': 'bar'
        }

        self._send_to_es_and_check(doc, delete=True)

    def test_conflict(self):
        doc = {
            '_id': uuid.uuid4().hex,
            'doc_type': 'MyCoolDoc',
            'property': 'foo'
        }
        self._send_to_es_and_check(doc)

        # attempt to create the same doc twice shouldn't fail
        self._send_to_es_and_check(doc)
Esempio n. 28
0
def es_query(params=None,
             facets=None,
             terms=None,
             q=None,
             es_index=None,
             start_at=None,
             size=None,
             dict_only=False,
             fields=None,
             facet_size=None):
    if terms is None:
        terms = []
    if q is None:
        q = {}
    else:
        q = copy.deepcopy(q)
    if params is None:
        params = {}

    q["size"] = size if size is not None else q.get("size", SIZE_LIMIT)
    q["from"] = start_at or 0

    def get_or_init_anded_filter_from_query_dict(qdict):
        and_filter = qdict.get("filter", {}).pop("and", [])
        filter = qdict.pop("filter", None)
        if filter:
            and_filter.append(filter)
        return {"and": and_filter}

    filter = get_or_init_anded_filter_from_query_dict(q)

    def convert(param):
        #todo: find a better way to handle bools, something that won't break fields that may be 'T' or 'F' but not bool
        if param == 'T' or param is True:
            return 1
        elif param == 'F' or param is False:
            return 0
        return param

    for attr in params:
        if attr not in terms:
            attr_val = [convert(params[attr])] if not isinstance(
                params[attr], list) else [convert(p) for p in params[attr]]
            filter["and"].append({"terms": {attr: attr_val}})

    if facets:
        q["facets"] = q.get("facets", {})
        if isinstance(facets, list):
            for facet in facets:
                q["facets"][facet] = {
                    "terms": {
                        "field": facet,
                        "size": facet_size or SIZE_LIMIT
                    }
                }
        elif isinstance(facets, dict):
            q["facets"].update(facets)

    if filter["and"]:
        query = q.pop("query", {})
        q["query"] = {
            "filtered": {
                "filter": filter,
            }
        }
        q["query"]["filtered"]["query"] = query if query else {"match_all": {}}

    if fields is not None:
        q["fields"] = q.get("fields", [])
        q["fields"].extend(fields)

    if dict_only:
        return q

    es_index = es_index or 'domains'
    es_interface = ElasticsearchInterface(get_es_new())
    meta = ES_META[es_index]

    try:
        result = es_interface.search(meta.index, meta.type, body=q)
        report_and_fail_on_shard_failures(result)
    except ElasticsearchException as e:
        raise ESError(e)

    if fields is not None:
        for res in result['hits']['hits']:
            flatten_field_dict(res)

    return result
Esempio n. 29
0
class ESView(View):
    """
    Generic CBV for interfacing with the Elasticsearch REST api.
    This is necessary because tastypie's built in REST assumptions don't like
    ES's POST for querying, which we can set explicitly here.

    For security purposes, queries ought to be domain'ed by the requesting user, so a base_query
    is encouraged to be added.

    Access to the APIs can be done via url endpoints which are attached to the corehq.api.urls

    or programmatically via the self.run_query() method.

    This current iteration of the ESView must require a domain for its usage for security purposes.
    """
    #note - for security purposes, csrf protection is ENABLED
    #search POST queries must take the following format:
    #query={query_json}
    #csrfmiddlewaretoken=token

    #in curl, this is:
    #curl -b "csrftoken=<csrftoken>;sessionid=<session_id>" -H "Content-Type: application/json" -XPOST http://server/a/domain/api/v0.1/xform_es/
    #     -d"[email protected]&csrfmiddlewaretoken=<csrftoken>"
    #or, call this programmatically to avoid CSRF issues.

    es_alias = ""
    domain = ""
    es = None
    doc_type = None
    model = None

    http_method_names = ['get', 'post', 'head', ]

    def __init__(self, domain):
        super(ESView, self).__init__()
        self.domain = domain.lower()
        self.es = get_es_new()
        self.es_interface = ElasticsearchInterface(self.es)

    def head(self, *args, **kwargs):
        raise NotImplementedError("Not implemented")

    @method_decorator(login_and_domain_required)
    #@method_decorator(csrf_protect)
    # todo: csrf_protect temporarily removed and left to implementor's prerogative
    # getting ajax'ed csrf token method needs revisit.
    def dispatch(self, *args, **kwargs):
        req = args[0]
        self.pretty = req.GET.get('pretty', False)
        if self.pretty:
            self.indent = 4
        else:
            self.indent = None
        ret = super(ESView, self).dispatch(*args, **kwargs)
        return ret

    @classonlymethod
    def as_view(cls, **initkwargs):
        """
        Django as_view cannot be used since the constructor requires information only present in the request.
        """
        raise Exception('as_view not supported for domain-specific ESView')
        
    @classonlymethod
    def as_domain_specific_view(cls, **initkwargs):
        """
        Creates a simple domain-specific class-based view for passing through ES requests.
        """
        def view(request, domain, *args, **kwargs):
            self = cls(domain)
            return self.dispatch(request, domain, *args, **kwargs)

        return view

    def get_document(self, doc_id):
        try:
            doc = self.es_interface.get_doc(self.es_alias, '_all', doc_id)
        except NotFoundError:
            raise object_does_not_exist(self.doc_type, doc_id)

        if doc.get('domain') != self.domain:
            raise object_does_not_exist(self.doc_type, doc_id)

        return self.model(doc) if self.model else doc

    def run_query(self, es_query, es_type=None):
        """
        Run a more advanced POST based ES query

        Returns the raw query json back, or None if there's an error
        """

        logger.info("ESlog: [%s.%s] ESquery: %s" % (self.__class__.__name__, self.domain, json.dumps(es_query)))
        if 'fields' in es_query or 'script_fields' in es_query:
            #nasty hack to add domain field to query that does specific fields.
            #do nothing if there's no field query because we get everything
            fields = es_query.get('fields', [])
            fields.append('domain')
            es_query['fields'] = fields

        try:
            es_results = self.es_interface.search(self.es_alias, es_type, body=es_query)
            report_and_fail_on_shard_failures(es_results)
        except ElasticsearchException as e:
            if 'query_string' in es_query.get('query', {}).get('filtered', {}).get('query', {}):
                # the error may have been caused by a bad query string
                # re-run with no query string to check
                querystring = es_query['query']['filtered']['query']['query_string']['query']
                new_query = es_query
                new_query['query']['filtered']['query'] = {"match_all": {}}
                new_results = self.run_query(new_query)
                if new_results:
                    # the request succeeded without that query string
                    # an error with a blank query will return None
                    raise ESUserError("Error with elasticsearch query: %s" %
                        querystring)

            msg = "Error in elasticsearch query [%s]: %s\nquery: %s" % (self.es_alias, str(e), es_query)
            raise ESError(msg)

        hits = []
        for res in es_results['hits']['hits']:
            if '_source' in res:
                res_domain = res['_source'].get('domain', None)
            elif 'fields' in res:
                res['fields'] = flatten_field_dict(res)
                res_domain = res['fields'].get('domain', None)

            # security check
            if res_domain == self.domain:
                hits.append(res)
            else:
                logger.info("Requester domain %s does not match result domain %s" % (
                    self.domain, res_domain))
        es_results['hits']['hits'] = hits
        return es_results
Esempio n. 30
0
 def __init__(self, domain):
     super(ESView, self).__init__()
     self.domain = domain.lower()
     self.es = get_es_new()
     self.es_interface = ElasticsearchInterface(self.es)