def list_records(
    client,
    request,
    query,
    identifier,
    page_number,
    sort_by,
    search_params,
    list_redirect_target,
    reset_url,
    uuid,
):
    resources = LazyPagedSequence(
        lambda page, page_size: client.find_collections(
            search_pattern=query,
            identifier=identifier,
            page=page,
            page_size=page_size,
            sort_by=sort_by,
        ),
        PAGE_SIZE,
        client.count_collections(query, identifier),
    )
    page = helpers.pager(resources, PAGE_SIZE, page_number)

    sort_direction = _determine_reverse_sort_direction(sort_by)

    return render(request, list_redirect_target, locals())
Exemple #2
0
def search(request):
    # FIXME there has to be a better way of handling checkboxes than parsing
    # them by hand here, and displaying 'checked' in
    # _archival_storage_search_form.html
    # Parse checkbox for file mode
    yes_options = ('checked', 'yes', 'true', 'on')
    if request.GET.get('filemode', '') in yes_options:
        file_mode = True
        checked_if_in_file_mode = 'checked'
        items_per_page = 20
    else:  # AIP list
        file_mode = False
        checked_if_in_file_mode = ''
        items_per_page = 10

    # Parse checkbox for show AICs
    show_aics = ''
    if request.GET.get('show_aics', '') in yes_options:
        show_aics = 'checked'

    # get search parameters from request
    queries, ops, fields, types = advanced_search.search_parameter_prep(request)
    logger.debug('Queries: %s, Ops: %s, Fields: %s, Types: %s', queries, ops, fields, types)

    # redirect if no search params have been set
    if 'query' not in request.GET:
        return helpers.redirect_with_get_params(
            'components.archival_storage.views.search',
            query='',
            field='',
            type=''
        )

    # get string of URL parameters that should be passed along when paging
    search_params = advanced_search.extract_url_search_params_from_request(request)

    current_page_number = int(request.GET.get('page', 1))

    # perform search
    es_client = elasticSearchFunctions.get_client()
    results = None
    query = advanced_search.assemble_query(es_client, queries, ops, fields, types, search_index='aips', doc_type='aipfile')
    try:
        # use all results to pull transfer facets if not in file mode
        # pulling only one field (we don't need field data as we augment
        # the results using separate queries)
        if not file_mode:
            # Fetch all unique AIP UUIDs in the returned set of files
            query['aggs'] = {'aip_uuids': {'terms': {'field': 'AIPUUID', 'size': 0}}}
            # Don't return results, just the aggregation
            query['size'] = 0
            # Searching for AIPs still actually searches type 'aipfile', and
            # returns the UUID of the AIP the files are a part of.  To search
            # for an attribute of an AIP, the aipfile must index that
            # information about their AIP in
            # elasticSearchFunctions.index_mets_file_metadata
            results = es_client.search(
                body=query,
                index='aips',
                doc_type='aipfile',
                sort='sipName:desc',
            )
            # Given these AIP UUIDs, now fetch the actual information we want from aips/aip
            buckets = results['aggregations']['aip_uuids']['buckets']
            uuids = [bucket['key'] for bucket in buckets]
            uuid_file_counts = {bucket['key']: bucket['doc_count'] for bucket in buckets}
            query = {
                'query': {
                    'terms': {
                        'uuid': uuids,
                    },
                },
            }
            index = 'aips'
            doc_type = 'aip'
            fields = 'name,uuid,size,created,status,AICID,isPartOf,countAIPsinAIC,encrypted'
            sort = 'name:desc'
        else:
            index = 'aips'
            doc_type = 'aipfile'
            fields = 'AIPUUID,filePath,FILEUUID,encrypted'
            sort = 'sipName:desc'

        # To reduce amount of data fetched from ES, use LazyPagedSequence
        def es_pager(page, page_size):
            """
            Fetch one page of normalized aipfile entries from Elasticsearch.

            :param page: 1-indexed page to fetch
            :param page_size: Number of entries on a page
            :return: List of dicts for each entry with additional information
            """
            start = (page - 1) * page_size
            results = es_client.search(
                body=query,
                from_=start,
                size=page_size,
                index=index,
                doc_type=doc_type,
                fields=fields,
                sort=sort,
            )
            if file_mode:
                return search_augment_file_results(es_client, results)
            else:
                return search_augment_aip_results(results, uuid_file_counts)
        count = es_client.count(index=index, doc_type=doc_type, body={'query': query['query']})['count']
        results = LazyPagedSequence(es_pager, items_per_page, count)

    except ElasticsearchException:
        logger.exception('Error accessing index.')
        return HttpResponse('Error accessing index.')

    if not file_mode:
        aic_creation_form = forms.CreateAICForm(initial={'results': uuids})
    else:  # if file_mode
        aic_creation_form = None

    page_data = helpers.pager(results, items_per_page, current_page_number)

    return render(request, 'archival_storage/search.html',
                  {
                      'file_mode': file_mode,
                      'show_aics': show_aics,
                      'checked_if_in_file_mode': checked_if_in_file_mode,
                      'aic_creation_form': aic_creation_form,
                      'results': page_data.object_list,
                      'search_params': search_params,
                      'page': page_data,
                  }
                  )
Exemple #3
0
def list_display(request):

    if 'aips' not in settings.SEARCH_ENABLED:
        return render(request, 'archival_storage/list.html')
    current_page_number = int(request.GET.get('page', 1))
    logger.debug('Current page: %s', current_page_number)

    # get count of AIP files
    es_client = elasticSearchFunctions.get_client()
    aip_indexed_file_count = aip_file_count(es_client)

    # get AIPs
    order_by = request.GET.get('order_by', 'name_unanalyzed')
    sort_by = request.GET.get('sort_by', 'up')

    if sort_by == 'down':
        sort_direction = 'desc'
    else:
        sort_direction = 'asc'

    sort_specification = order_by + ':' + sort_direction
    sort_params = 'order_by=' + order_by + '&sort_by=' + sort_by

    # get list of UUIDs of AIPs that are deleted or pending deletion
    aips_deleted_or_pending_deletion = []
    should_haves = [
        {'match': {'status': 'DEL_REQ'}},
        {'match': {'status': 'DELETED'}},
    ]
    query = {
        "query": {
            "bool": {
                "should": should_haves
            }
        }
    }
    deleted_aip_results = es_client.search(
        body=query,
        index='aips',
        doc_type='aip',
        fields='uuid,status'
    )
    for deleted_aip in deleted_aip_results['hits']['hits']:
        aips_deleted_or_pending_deletion.append(deleted_aip['fields']['uuid'][0])

    # Fetch results and paginate
    def es_pager(page, page_size):
        """
        Fetch one page of normalized entries from Elasticsearch.

        :param page: 1-indexed page to fetch
        :param page_size: Number of entries on a page
        :return: List of dicts for each entry, where keys and values have been cleaned up
        """
        start = (page - 1) * page_size
        results = es_client.search(
            index='aips',
            doc_type='aip',
            body=elasticSearchFunctions.MATCH_ALL_QUERY,
            fields='origin,uuid,filePath,created,name,size,encrypted',
            sort=sort_specification,
            size=page_size,
            from_=start,
        )
        # normalize results - each of the fields contains a single value,
        # but is returned from the ES API as a single-length array
        # e.g. {"fields": {"uuid": ["abcd"], "name": ["aip"] ...}}
        return [elasticSearchFunctions.normalize_results_dict(d) for d in results['hits']['hits']]

    items_per_page = 10
    count = es_client.count(index='aips', doc_type='aip', body=elasticSearchFunctions.MATCH_ALL_QUERY)['count']
    results = LazyPagedSequence(es_pager, page_size=items_per_page, length=count)

    # Paginate
    page = helpers.pager(
        results,
        items_per_page,
        current_page_number
    )

    # process deletion, etc., and format results
    aips = []
    for aip in page.object_list:
        # If an AIP was deleted or is pending deletion, react if status changed
        if aip['uuid'] in aips_deleted_or_pending_deletion:
            # check with storage server to see current status
            api_results = storage_service.get_file_info(uuid=aip['uuid'])
            try:
                aip_status = api_results[0]['status']
            except IndexError:
                # Storage service does not know about this AIP
                # TODO what should happen here?
                logger.info("AIP not found in storage service: {}".format(aip))
                continue

            # delete AIP metadata in ElasticSearch if AIP has been deleted from the
            # storage server
            # TODO: handle this asynchronously
            if aip_status == 'DELETED':
                elasticSearchFunctions.delete_aip(es_client, aip['uuid'])
                elasticSearchFunctions.delete_aip_files(es_client, aip['uuid'])
            elif aip_status != 'DEL_REQ':
                # update the status in ElasticSearch for this AIP
                elasticSearchFunctions.mark_aip_stored(es_client, aip['uuid'])
        else:
            aip_status = 'UPLOADED'

        # Tweak AIP presentation and add to display array
        if aip_status != 'DELETED':
            aip['status'] = AIP_STATUS_DESCRIPTIONS[aip_status]

            try:
                size = '{0:.2f} MB'.format(float(aip['size']))
            except (TypeError, ValueError):
                size = 'Removed'

            aip['size'] = size

            aip['href'] = aip['filePath'].replace(AIPSTOREPATH + '/', "AIPsStore/")
            aip['date'] = aip['created']

            aips.append(aip)

    total_size = total_size_of_aips(es_client)
    # Find out which AIPs are encrypted

    return render(request, 'archival_storage/list.html',
                  {
                      'total_size': total_size,
                      'aip_indexed_file_count': aip_indexed_file_count,
                      'aips': aips,
                      'page': page,
                      'search_params': sort_params,
                  }
                  )
Exemple #4
0
def search(request):
    # FIXME there has to be a better way of handling checkboxes than parsing
    # them by hand here, and displaying 'checked' in
    # _archival_storage_search_form.html
    # Parse checkbox for file mode
    yes_options = ("checked", "yes", "true", "on")
    if request.GET.get("filemode", "") in yes_options:
        file_mode = True
        checked_if_in_file_mode = "checked"
        items_per_page = 20
    else:  # AIP list
        file_mode = False
        checked_if_in_file_mode = ""
        items_per_page = 10

    # Parse checkbox for show AICs
    show_aics = ""
    if request.GET.get("show_aics", "") in yes_options:
        show_aics = "checked"

    # get search parameters from request
    queries, ops, fields, types = advanced_search.search_parameter_prep(
        request)
    logger.debug("Queries: %s, Ops: %s, Fields: %s, Types: %s", queries, ops,
                 fields, types)

    # redirect if no search params have been set
    if "query" not in request.GET:
        return helpers.redirect_with_get_params(
            "components.archival_storage.views.search",
            query="",
            field="",
            type="")

    # get string of URL parameters that should be passed along when paging
    search_params = advanced_search.extract_url_search_params_from_request(
        request)

    current_page_number = int(request.GET.get("page", 1))

    # perform search
    es_client = elasticSearchFunctions.get_client()
    results = None
    query = advanced_search.assemble_query(queries, ops, fields, types)
    try:
        # Use all results to pull transfer facets if not in file mode
        # pulling only one field (we don't need field data as we augment
        # the results using separate queries).
        if not file_mode:
            # Fetch all unique AIP UUIDs in the returned set of files
            # ES query will limit to 10 aggregation results by default,
            # add size parameter in terms to override.
            # TODO: Use composite aggregation when it gets out of beta.
            query["aggs"] = {
                "aip_uuids": {
                    "terms": {
                        "field": "AIPUUID",
                        "size": "10000"
                    }
                }
            }
            # Don't return results, just the aggregation
            query["size"] = 0
            # Searching for AIPs still actually searches type 'aipfile', and
            # returns the UUID of the AIP the files are a part of.  To search
            # for an attribute of an AIP, the aipfile must index that
            # information about their AIP.
            results = es_client.search(body=query, index="aipfiles")
            # Given these AIP UUIDs, now fetch the actual information we want from aips/aip
            buckets = results["aggregations"]["aip_uuids"]["buckets"]
            uuids = [bucket["key"] for bucket in buckets]
            uuid_file_counts = {
                bucket["key"]: bucket["doc_count"]
                for bucket in buckets
            }
            query = {"query": {"terms": {"uuid": uuids}}}
            index = "aips"
            fields = (
                "name,uuid,size,created,status,AICID,isPartOf,countAIPsinAIC,encrypted"
            )
            sort = "name.raw:desc"
        else:
            index = "aipfiles"
            fields = "AIPUUID,filePath,FILEUUID,encrypted"
            sort = "sipName.raw:desc"

        # To reduce amount of data fetched from ES, use LazyPagedSequence
        def es_pager(page, page_size):
            """
            Fetch one page of normalized aipfile entries from Elasticsearch.

            :param page: 1-indexed page to fetch
            :param page_size: Number of entries on a page
            :return: List of dicts for each entry with additional information
            """
            start = (page - 1) * page_size
            results = es_client.search(
                body=query,
                from_=start,
                size=page_size,
                index=index,
                _source=fields,
                sort=sort,
            )
            if file_mode:
                return search_augment_file_results(es_client, results)
            else:
                return search_augment_aip_results(results, uuid_file_counts)

        count = es_client.count(index=index, body={"query":
                                                   query["query"]})["count"]
        results = LazyPagedSequence(es_pager, items_per_page, count)

    except ElasticsearchException:
        logger.exception("Error accessing index.")
        return HttpResponse("Error accessing index.")

    if not file_mode:
        aic_creation_form = forms.CreateAICForm(initial={"results": uuids})
    else:  # if file_mode
        aic_creation_form = None

    page_data = helpers.pager(results, items_per_page, current_page_number)

    return render(
        request,
        "archival_storage/search.html",
        {
            "file_mode": file_mode,
            "show_aics": show_aics,
            "checked_if_in_file_mode": checked_if_in_file_mode,
            "aic_creation_form": aic_creation_form,
            "results": page_data.object_list,
            "search_params": search_params,
            "page": page_data,
        },
    )
Exemple #5
0
def list_display(request):

    if "aips" not in settings.SEARCH_ENABLED:
        return render(request, "archival_storage/list.html")
    current_page_number = int(request.GET.get("page", 1))
    logger.debug("Current page: %s", current_page_number)

    # get count of AIP files
    es_client = elasticSearchFunctions.get_client()
    aip_indexed_file_count = aip_file_count(es_client)

    # get AIPs
    order_by = request.GET.get("order_by", "name")
    sort_by = request.GET.get("sort_by", "up")

    sort_params = "order_by=" + order_by + "&sort_by=" + sort_by

    # use raw subfield to sort by name
    if order_by == "name":
        order_by = order_by + ".raw"

    # change sort_by param to ES sort directions
    if sort_by == "down":
        sort_by = "desc"
    else:
        sort_by = "asc"

    sort_specification = order_by + ":" + sort_by

    # get list of UUIDs of AIPs that are deleted or pending deletion
    aips_deleted_or_pending_deletion = []
    should_haves = [{
        "match": {
            "status": "DEL_REQ"
        }
    }, {
        "match": {
            "status": "DELETED"
        }
    }]
    query = {"query": {"bool": {"should": should_haves}}}
    deleted_aip_results = es_client.search(body=query,
                                           index="aips",
                                           _source="uuid,status")
    for deleted_aip in deleted_aip_results["hits"]["hits"]:
        aips_deleted_or_pending_deletion.append(deleted_aip["_source"]["uuid"])

    # Fetch results and paginate
    def es_pager(page, page_size):
        """
        Fetch one page of normalized entries from Elasticsearch.

        :param page: 1-indexed page to fetch
        :param page_size: Number of entries on a page
        :return: List of dicts for each entry, where keys and values have been cleaned up
        """
        start = (page - 1) * page_size
        results = es_client.search(
            index="aips",
            body={"query": {
                "match_all": {}
            }},
            _source="origin,uuid,filePath,created,name,size,encrypted",
            sort=sort_specification,
            size=page_size,
            from_=start,
        )
        return [d["_source"] for d in results["hits"]["hits"]]

    items_per_page = 10
    count = es_client.count(index="aips", body={"query": {
        "match_all": {}
    }})["count"]
    results = LazyPagedSequence(es_pager,
                                page_size=items_per_page,
                                length=count)

    # Paginate
    page = helpers.pager(results, items_per_page, current_page_number)

    # process deletion, etc., and format results
    aips = []
    for aip in page.object_list:
        # If an AIP was deleted or is pending deletion, react if status changed
        if aip["uuid"] in aips_deleted_or_pending_deletion:
            # check with storage server to see current status
            api_results = storage_service.get_file_info(uuid=aip["uuid"])
            try:
                aip_status = api_results[0]["status"]
            except IndexError:
                # Storage service does not know about this AIP
                # TODO what should happen here?
                logger.info("AIP not found in storage service: {}".format(aip))
                continue

            # delete AIP metadata in ElasticSearch if AIP has been deleted from the
            # storage server
            # TODO: handle this asynchronously
            if aip_status == "DELETED":
                elasticSearchFunctions.delete_aip(es_client, aip["uuid"])
                elasticSearchFunctions.delete_aip_files(es_client, aip["uuid"])
            elif aip_status != "DEL_REQ":
                # update the status in ElasticSearch for this AIP
                elasticSearchFunctions.mark_aip_stored(es_client, aip["uuid"])
        else:
            aip_status = "UPLOADED"

        # Tweak AIP presentation and add to display array
        if aip_status != "DELETED":
            aip["status"] = AIP_STATUS_DESCRIPTIONS[aip_status]

            try:
                size = "{0:.2f} MB".format(float(aip["size"]))
            except (TypeError, ValueError):
                size = "Removed"

            aip["size"] = size

            aip["href"] = aip["filePath"].replace(AIPSTOREPATH + "/",
                                                  "AIPsStore/")
            aip["date"] = aip["created"]

            aips.append(aip)

    total_size = total_size_of_aips(es_client)
    # Find out which AIPs are encrypted

    return render(
        request,
        "archival_storage/list.html",
        {
            "total_size": total_size,
            "aip_indexed_file_count": aip_indexed_file_count,
            "aips": aips,
            "page": page,
            "search_params": sort_params,
        },
    )