def status_for_index(index_name):
    try:
        with logged_duration_for_external_request('es'):
            res = es.indices.stats(index=index_name, human=True)
        with logged_duration_for_external_request('es'):
            info = es.indices.get(index_name)
    except TransportError as e:
        return _get_an_error_message(e), e.status_code

    return convert_es_status(index_name, res, info), 200
def delete_index(index_name):
    try:
        with logged_duration_for_external_request('es'):
            es.indices.delete(index=index_name)
        return "acknowledged", 200
    except TransportError as e:
        return _get_an_error_message(e), e.status_code
def delete_by_id(index_name, doc_type, document_id):
    try:
        with logged_duration_for_external_request('es'):
            res = es.delete(index=index_name, id=document_id)
        return res, 200
    except TransportError as e:
        return _get_an_error_message(e), e.status_code
def get_mapping(index_name, document_type):
    try:
        # es.indices.get_mapping has a key for the index name, regardless of any alias we may be going via, so rather
        # than use index_name, we access the one and only value in the dictionary using next(iter).
        with logged_duration_for_external_request('es'):
            mapping_data = next(iter(es.indices.get_mapping(index=index_name).values()))
    except NotFoundError as e:
        if e.error == "type_missing_exception":
            raise MappingNotFound("Document type '{}' is not valid in index '{}' - no mapping found.".format(
                document_type, index_name))
        else:
            raise
    except StopIteration:
        raise MappingNotFound("Document type '{}' is not valid in index '{}' - no mapping found.".format(
            document_type, index_name))

    # In ES 7 mapping types are being removed, so document types are no longer relevant.
    # However our API still uses them in URLs and is expecting a 400 to be raised in case
    # the wrong document type is specified.
    if mapping_data["mappings"]["_meta"]["doc_type"] != document_type:
        raise MappingNotFound(
            f"Document type '{document_type}' is not valid in index '{index_name}' - not returning mapping."
        )

    return Mapping(mapping_data, mapping_type=document_type)
def scan():
    if 'document' not in request.files:
        abort(
            400,
            'The file for scanning should be uploaded under the name `document`.'
        )

    file = request.files['document']

    try:
        client = clamd.ClamdUnixSocket()

        with logged_duration_for_external_request(
                service='ClamAV', description='instream scan via unix socket'):
            scan_result = client.instream(BytesIO(
                file.stream.read()))['stream']

    except clamd.ClamdError as e:
        current_app.logger.error(f'Unable to scan file: {str(e)}')
        return abort(500)

    response_json = {
        'infectionFound': False if scan_result[0] == 'OK' else True,
        'details': scan_result[1:]
    }

    return jsonify(response_json), 200
def create_index(index_name, mapping_name):
    mapping_definition = app.mapping.load_mapping_definition(mapping_name)
    try:
        with logged_duration_for_external_request('es'):
            es.indices.create(index=index_name, body=mapping_definition)
        return "acknowledged", 200
    except TransportError as e:
        current_app.logger.warning(
            "Failed to create the index %s: %s",
            index, _get_an_error_message(e)
        )
        return _get_an_error_message(e), e.status_code
def index(index_name, doc_type, document, document_id):
    try:
        with logged_duration_for_external_request('es'):
            es.index(
                index=index_name,
                id=document_id,
                body=document)
            return "acknowledged", 200
    except TransportError as e:
        current_app.logger.error(
            "Failed to index the document %s: %s",
            document_id, _get_an_error_message(e)
        )
        return _get_an_error_message(e), e.status_code
def create_alias(alias_name, target_index):
    """Sets an alias for a given index

    If alias already exists it's removed from any existing indexes first.

    """

    try:
        with logged_duration_for_external_request('es'):
            es.indices.update_aliases({"actions": [
                {"remove": {"index": "_all", "alias": alias_name}},
                {"add": {"index": target_index, "alias": alias_name}}
            ]})
        return "acknowledged", 200
    except TransportError as e:
        return _get_an_error_message(e), e.status_code
def core_search_and_aggregate(index_name, doc_type, query_args, search=False, aggregations=[]):
    try:
        mapping = app.mapping.get_mapping(index_name, doc_type)
        page_size = int(current_app.config['DM_SEARCH_PAGE_SIZE'])
        if 'idOnly' in query_args:
            page_size *= int(current_app.config['DM_ID_ONLY_SEARCH_PAGE_SIZE_MULTIPLIER'])

        es_search_kwargs = {'search_type': 'dfs_query_then_fetch'} if search else {}
        constructed_query = construct_query(mapping, query_args, aggregations, page_size)
        with logged_duration_for_external_request('es'):
            res = es.search(
                index=index_name, body=constructed_query, track_total_hits=True, **es_search_kwargs
            )

        results = convert_es_results(mapping, res, query_args)

        def url_for_search(**kwargs):
            return url_for('.search', index_name=index_name, doc_type=doc_type, _external=True, **kwargs)

        response = {
            "meta": results['meta'],
            "documents": results['documents'],
            "links": generate_pagination_links(
                query_args, results['meta']['total'],
                page_size, url_for_search
            ),
        }

        if aggregations:
            # Return aggregations in a slightly cleaner format.
            response['aggregations'] = {
                k: {d['key']: d['doc_count'] for d in v['buckets']}
                for k, v in res.get('aggregations', {}).items()
            }

        # determine whether we're actually off the end of the results. ES handles this as a result-less-yet-happy
        # response, but we probably want to turn it into a 404 not least so we can match our behaviour when fetching
        # beyond the `max_result_window` below
        if search and constructed_query.get("from") and not response["documents"]:
            return _page_404_response(query_args.get("page", None))

        return response, 200

    except TransportError as e:
        try:
            root_causes = getattr(e, "info", {}).get("error", {}).get("root_cause", {})
        except AttributeError:
            # Catch if the contents of 'info' has no ability to get attributes
            return _get_an_error_message(e), e.status_code

        if root_causes and root_causes[0].get("reason").startswith("Result window is too large"):
            # in this case we have to fire off another request to determine how we should handle this error...
            # (note minor race condition possible if index is modified between the original call and this one)
            try:
                body = construct_query(mapping, query_args, page_size=None)
                with logged_duration_for_external_request('es'):
                    result_count = es.count(
                        index=index_name,
                        body=body
                    )["count"]
            except TransportError as e:
                return _get_an_error_message(e), e.status_code
            else:
                if result_count < constructed_query.get("from", 0):
                    # there genuinely aren't enough results for this number of pages, so this should be a 404
                    return _page_404_response(query_args.get("page", None))
                # else fall through and allow this to 500 - we probably don't have max_result_window set high enough
                # for the number of results it's possible to access using this index.
        return _get_an_error_message(e), e.status_code

    except ValueError as e:
        return str(e), 400