Esempio n. 1
0
def transfer_backlog(request):
    # deal with transfer mode
    file_mode = False
    checked_if_in_file_mode = ''
    if request.GET.get('mode', '') != '':
        file_mode = True
        checked_if_in_file_mode = 'checked'

    # get search parameters from request
    queries, ops, fields, types = advanced_search.search_parameter_prep(
        request)

    # redirect if no search params have been set
    if not 'query' in request.GET:
        return helpers.redirect_with_get_params(
            'components.ingest.views.transfer_backlog',
            query='',
            field='',
            type='')

    # get string of URL parameters that should be passed along when paging
    search_params = advanced_search.extract_url_search_params_from_request(
        request)

    # set paging variables
    if not file_mode:
        items_per_page = 10
    else:
        items_per_page = 20

    page = advanced_search.extract_page_number_from_url(request)

    start = page * items_per_page + 1

    # perform search
    conn = elasticSearchFunctions.connect_and_create_index('transfers')

    try:
        query = advanced_search.assemble_query(
            queries,
            ops,
            fields,
            types,
            must_haves=[pyes.TermQuery('status', 'backlog')])

        # use all results to pull transfer facets if not in file mode
        if not file_mode:
            results = conn.search_raw(
                query,
                indices='transfers',
                type='transferfile',
            )
        else:
            # otherwise use pages results
            results = conn.search_raw(query,
                                      indices='transfers',
                                      type='transferfile',
                                      start=start - 1,
                                      size=items_per_page)
    except:
        return HttpResponse('Error accessing index.')

    # take note of facet data
    file_extension_usage = results['facets']['fileExtension']['terms']
    transfer_uuids = results['facets']['sipuuid']['terms']

    if not file_mode:
        # run through transfers to see if they've been created yet
        awaiting_creation = {}
        for transfer_instance in transfer_uuids:
            try:
                awaiting_creation[transfer_instance.
                                  term] = transfer_awaiting_sip_creation_v2(
                                      transfer_instance.term)
                transfer = models.Transfer.objects.get(
                    uuid=transfer_instance.term)
                transfer_basename = os.path.basename(
                    transfer.currentlocation[:-1])
                transfer_instance.name = transfer_basename[:-37]
                transfer_instance.type = transfer.type
                if transfer.accessionid != None:
                    transfer_instance.accession = transfer.accessionid
                else:
                    transfer_instance.accession = ''
            except:
                awaiting_creation[transfer_instance.term] = False

        # page data
        number_of_results = len(transfer_uuids)
        page_data = helpers.pager(transfer_uuids, items_per_page, page + 1)
        transfer_uuids = page_data['objects']
    else:
        # page data
        number_of_results = results.hits.total
        results = transfer_backlog_augment_search_results(results)

    # set remaining paging variables
    end, previous_page, next_page = advanced_search.paging_related_values_for_template_use(
        items_per_page, page, start, number_of_results)

    # make sure results is set
    try:
        if results:
            pass
    except:
        results = False

    form = StorageSearchForm(initial={'query': queries[0]})
    return render(request, 'ingest/backlog/search.html', locals())
Esempio n. 2
0
def transfer_backlog(request, ui):
    """
    AJAX endpoint to query for and return transfer backlog items.
    """
    es_client = elasticSearchFunctions.get_client()
    results = None

    # Return files which are in the backlog
    backlog_filter = {
        'bool': {
            'must': {
                'term': {
                    'status': 'backlog',
                }
            }
        }
    }
    # Omit files without UUIDs (metadata and logs directories):
    # - When the `hidemetadatalogs` param is sent from SIP arrange.
    # - Always from the appraisal tab.
    if ui == 'appraisal' or request.GET.get('hidemetadatalogs'):
        backlog_filter['bool']['must_not'] = {
            'term': {
                'fileuuid': '',
            }
        }

    # Get search parameters from request
    if 'query' not in request.GET:
        # Use backlog boolean filter as boolean query
        query = {'query': backlog_filter}
    else:
        queries, ops, fields, types = advanced_search.search_parameter_prep(
            request)

        try:
            query = advanced_search.assemble_query(
                queries,
                ops,
                fields,
                types,
                filters=[backlog_filter],
            )
        except:
            logger.exception('Error accessing index.')
            return HttpResponse('Error accessing index.')

    # perform search
    try:
        results = elasticSearchFunctions.search_all_results(
            es_client,
            body=query,
            index='transferfiles',
        )
    except:
        logger.exception('Error accessing index.')
        return HttpResponse('Error accessing index.')

    # Convert results into a more workable form
    results = elasticSearchFunctions.augment_raw_search_results(results)

    # Convert to a form JS can use:
    # [{'name': <filename>,
    #   'properties': {'not_draggable': False}},
    #  {'name': <directory name>,
    #   'properties': {'not_draggable': True, 'object count': 3, 'display_string': '3 objects'},
    #   'children': [
    #    {'name': <filename>,
    #     'properties': {'not_draggable': True}},
    #    {'name': <directory name>,
    #     'children': [...]
    #    }
    #   ]
    #  },
    # ]
    return_list = []
    directory_map = {}
    # _es_results_to_directory_tree requires that paths MUST be sorted
    results.sort(key=lambda x: x['relative_path'])
    for path in results:
        # If a path is in SIPArrange.original_path, then it shouldn't be draggable
        not_draggable = False
        if models.SIPArrange.objects.filter(
                original_path__endswith=path['relative_path']).exists():
            not_draggable = True
        if ui == 'legacy':
            _es_results_to_directory_tree(path['relative_path'],
                                          return_list,
                                          not_draggable=not_draggable)
        else:
            _es_results_to_appraisal_tab_format(path,
                                                directory_map,
                                                return_list,
                                                not_draggable=not_draggable)

    if ui == 'legacy':
        response = return_list
    else:
        response = {
            'formats': [],  # TODO populate this
            'transfers': return_list,
        }

    # return JSON response
    return helpers.json_response(response)
Esempio n. 3
0
def search(request):
    # deal with transfer mode
    file_mode = False
    checked_if_in_file_mode = ''
    if request.GET.get('mode', '') != '':
        file_mode = True
        checked_if_in_file_mode = 'checked'

    # get search parameters from request
    queries, ops, fields, types = advanced_search.search_parameter_prep(
        request)

    # redirect if no search params have been set
    if not 'query' in request.GET:
        return helpers.redirect_with_get_params(
            'components.archival_storage.views.search',
            query='',
            field='',
            type='')

    # get string of URL parameters that should be passed along when paging
    search_params = advanced_search.extract_url_search_params_from_request(
        request)

    # set paging variables
    if not file_mode:
        items_per_page = 2
    else:
        items_per_page = 20

    page = advanced_search.extract_page_number_from_url(request)

    start = page * items_per_page + 1

    # perform search
    conn = pyes.ES(elasticSearchFunctions.getElasticsearchServerHostAndPort())

    try:
        query = advanced_search.assemble_query(queries, ops, fields, types)

        # use all results to pull transfer facets if not in file mode
        # pulling only one field (we don't need field data as we augment
        # the results using separate queries)
        if not file_mode:
            results = conn.search_raw(query=query,
                                      indices='aips',
                                      type='aipfile',
                                      fields='uuid')
        else:
            results = conn.search_raw(query=query,
                                      indices='aips',
                                      type='aipfile',
                                      start=start - 1,
                                      size=items_per_page,
                                      fields='AIPUUID,filePath,FILEUUID')
    except:
        return HttpResponse('Error accessing index.')

    # take note of facet data
    aip_uuids = results['facets']['AIPUUID']['terms']

    if not file_mode:
        number_of_results = len(aip_uuids)

        page_data = helpers.pager(aip_uuids, items_per_page, page + 1)
        aip_uuids = page_data['objects']
        search_augment_aip_results(conn, aip_uuids)
    else:
        number_of_results = results.hits.total
        results = search_augment_file_results(results)

    # set remaining paging variables
    end, previous_page, next_page = advanced_search.paging_related_values_for_template_use(
        items_per_page, page, start, number_of_results)

    # make sure results is set
    try:
        if results:
            pass
    except:
        results = False

    form = forms.StorageSearchForm(initial={'query': queries[0]})
    return render(request, 'archival_storage/archival_storage_search.html',
                  locals())
Esempio n. 4
0
def transfer_backlog(request):
    """
    AJAX endpoint to query for and return transfer backlog items.
    """
    # Get search parameters from request
    results = None
    conn = elasticSearchFunctions.connect_and_create_index('transfers')

    if not 'query' in request.GET:
        query = elasticSearchFunctions.MATCH_ALL_QUERY
    else:
        queries, ops, fields, types = advanced_search.search_parameter_prep(request)

        try:
            query = advanced_search.assemble_query(
                queries,
                ops,
                fields,
                types,
                # Specify this as a filter, not a must_have, for performance,
                # and so that it doesn't cause the "should" queries in a
                # should-only query to be ignored.
                filters={'term': {'status': 'backlog'}},
            )
        except:
            logger.exception('Error accessing index.')
            return HttpResponse('Error accessing index.')

    # perform search
    try:
        results = elasticSearchFunctions.search_all_results(
            conn,
            body=query,
            index='transfers',
            doc_type='transferfile',
        )
    except:
        logger.exception('Error accessing index.')
        return HttpResponse('Error accessing index.')


    # Convert results into a more workable form
    results = _transfer_backlog_augment_search_results(results)

    # Convert to a form JS can use:
    # [{'name': <filename>,
    #   'properties': {'not_draggable': False}},
    #  {'name': <directory name>,
    #   'properties': {'not_draggable': True, 'object count': 3, 'display_string': '3 objects'},
    #   'children': [
    #    {'name': <filename>,
    #     'properties': {'not_draggable': True}},
    #    {'name': <directory name>,
    #     'children': [...]
    #    }
    #   ]
    #  },
    # ]
    return_list = []
    # _es_results_to_directory_tree requires that paths MUST be sorted
    results.sort(key=lambda x: x['relative_path'])
    for path in results:
        # If a path is in SIPArrange.original_path, then it shouldn't be draggable
        not_draggable = False
        if models.SIPArrange.objects.filter(
            original_path__endswith=path['relative_path']).exists():
            not_draggable = True
        _es_results_to_directory_tree(path['relative_path'], return_list, not_draggable=not_draggable)

    # retun JSON response
    return helpers.json_response(return_list)
Esempio n. 5
0
def transfer_backlog(request, ui):
    """
    AJAX endpoint to query for and return transfer backlog items.
    """
    es_client = elasticSearchFunctions.get_client()
    results = None

    # Return files which are in the backlog
    backlog_filter = {"bool": {"must": {"term": {"status": "backlog"}}}}
    # Omit files without UUIDs (metadata and logs directories):
    # - When the `hidemetadatalogs` param is sent from SIP arrange.
    if request.GET.get("hidemetadatalogs"):
        backlog_filter["bool"]["must_not"] = {"term": {"fileuuid": ""}}

    # Get search parameters from request
    if "query" not in request.GET:
        # Use backlog boolean filter as boolean query
        query = {"query": backlog_filter}
    else:
        queries, ops, fields, types = advanced_search.search_parameter_prep(
            request)

        try:
            query = advanced_search.assemble_query(queries,
                                                   ops,
                                                   fields,
                                                   types,
                                                   filters=[backlog_filter])
        except:
            logger.exception("Error accessing index.")
            return HttpResponse("Error accessing index.")

    # perform search
    try:
        results = elasticSearchFunctions.search_all_results(
            es_client, body=query, index="transferfiles")
    except:
        logger.exception("Error accessing index.")
        return HttpResponse("Error accessing index.")

    # Convert results into a more workable form
    results = elasticSearchFunctions.augment_raw_search_results(results)

    # Convert to a form JS can use:
    # [{'name': <filename>,
    #   'properties': {'not_draggable': False}},
    #  {'name': <directory name>,
    #   'properties': {'not_draggable': True, 'object count': 3, 'display_string': '3 objects'},
    #   'children': [
    #    {'name': <filename>,
    #     'properties': {'not_draggable': True}},
    #    {'name': <directory name>,
    #     'children': [...]
    #    }
    #   ]
    #  },
    # ]
    return_list = []
    directory_map = {}
    # _es_results_to_directory_tree requires that paths MUST be sorted
    results.sort(key=lambda x: x["relative_path"])
    for path in results:
        # If a path is in SIPArrange.original_path, then it shouldn't be draggable
        not_draggable = False
        if models.SIPArrange.objects.filter(
                original_path__endswith=path["relative_path"]).exists():
            not_draggable = True
        if ui == "legacy":
            _es_results_to_directory_tree(path["relative_path"],
                                          return_list,
                                          not_draggable=not_draggable)
        else:
            _es_results_to_appraisal_tab_format(path,
                                                directory_map,
                                                return_list,
                                                not_draggable=not_draggable)

    if ui == "legacy":
        response = return_list
    else:
        if not request.GET.get("hidemetadatalogs"):
            # if metadata and log file are shown in the appraisal tab
            # directories should not be draggable if they contain
            # non draggable children
            adjust_non_draggable_nodes(return_list)
        response = {
            "formats": [],
            "transfers": return_list
        }  # TODO populate this

    # return JSON response
    return helpers.json_response(response)
Esempio n. 6
0
def transfer_backlog(request):
    # deal with transfer mode
    file_mode = False
    checked_if_in_file_mode = ''
    if request.GET.get('mode', '') != '':
        file_mode = True
        checked_if_in_file_mode = 'checked'

    # get search parameters from request
    queries, ops, fields, types = advanced_search.search_parameter_prep(request)

    # redirect if no search params have been set 
    if not 'query' in request.GET:
        return helpers.redirect_with_get_params(
            'components.ingest.views.transfer_backlog',
            query='',
            field='',
            type=''
        )

    # get string of URL parameters that should be passed along when paging
    search_params = advanced_search.extract_url_search_params_from_request(request)

    # set paging variables
    if not file_mode:
        items_per_page = 10
    else:
        items_per_page = 20

    page = advanced_search.extract_page_number_from_url(request)

    start = page * items_per_page + 1

    # perform search
    conn = elasticSearchFunctions.connect_and_create_index('transfers')

    try:
        query = advanced_search.assemble_query(
            queries,
            ops,
            fields,
            types,
            must_haves=[pyes.TermQuery('status', 'backlog')]
        )

        # use all results to pull transfer facets if not in file mode
        if not file_mode:
            results = conn.search_raw(
                query,
                indices='transfers',
                type='transferfile',
            )
        else:
        # otherwise use pages results
            results = conn.search_raw(
                query,
                indices='transfers',
                type='transferfile',
                start=start - 1,
                size=items_per_page
            )
    except:
        return HttpResponse('Error accessing index.')

    # take note of facet data
    file_extension_usage = results['facets']['fileExtension']['terms']
    transfer_uuids       = results['facets']['sipuuid']['terms']

    if not file_mode:
        # run through transfers to see if they've been created yet
        awaiting_creation = {}
        for transfer_instance in transfer_uuids:
            try:
                awaiting_creation[transfer_instance.term] = transfer_awaiting_sip_creation_v2(transfer_instance.term)
                transfer = models.Transfer.objects.get(uuid=transfer_instance.term)
                transfer_basename = os.path.basename(transfer.currentlocation[:-1])
                transfer_instance.name = transfer_basename[:-37]
                transfer_instance.type = transfer.type
                if transfer.accessionid != None:
                    transfer_instance.accession = transfer.accessionid
                else:
                    transfer_instance.accession = ''
            except:
                awaiting_creation[transfer_instance.term] = False

        # page data
        number_of_results = len(transfer_uuids)
        page_data = helpers.pager(transfer_uuids, items_per_page, page + 1)
        transfer_uuids = page_data['objects']
    else:
        # page data
        number_of_results = results.hits.total
        results = transfer_backlog_augment_search_results(results)

    # set remaining paging variables
    end, previous_page, next_page = advanced_search.paging_related_values_for_template_use(
       items_per_page,
       page,
       start,
       number_of_results
    )

    # make sure results is set
    try:
        if results:
            pass
    except:
        results = False

    form = StorageSearchForm(initial={'query': queries[0]})
    return render(request, 'ingest/backlog/search.html', locals())
Esempio n. 7
0
def search(request):
    """A JSON end point that returns results for AIPs and their files.

    :param request: Django request object.
    :return: A JSON object including required metadata for the datatable and
    the search results.
    """
    REQUEST_FILE = "requestFile"
    MIMETYPE = "mimeType"
    RETURN_ALL = "returnAll"
    FILE_NAME = "fileName"

    request_file = request.GET.get(REQUEST_FILE, "").lower() == "true"
    file_mime = request.GET.get(MIMETYPE, "")
    file_name = request.GET.get(FILE_NAME, "")

    # Configure page-size requirements for the search.
    DEFAULT_PAGE_SIZE = 10
    page_size = None
    if request.GET.get(RETURN_ALL, "").lower() == "true":
        page_size = es.MAX_QUERY_SIZE
    if page_size is None:
        page_size = int(request.GET.get("iDisplayLength", DEFAULT_PAGE_SIZE))

    # Get search parameters from the request.
    queries, ops, fields, types = advanced_search.search_parameter_prep(request)

    if "query" not in request.GET:
        queries, ops, fields, types = (["*"], ["or"], [""], ["term"])
    query = advanced_search.assemble_query(queries, ops, fields, types)
    file_mode = request.GET.get("file_mode") == "true"

    # Configure other aspects of the search including starting page and sort
    # order.
    start = int(request.GET.get("iDisplayStart", 0))
    order_by = get_es_property_from_column_index(
        int(request.GET.get("iSortCol_0", 0)), file_mode
    )
    sort_direction = request.GET.get("sSortDir_0", "asc")

    es_client = es.get_client()
    try:
        if file_mode:
            index = es.AIP_FILES_INDEX
            source = "filePath,FILEUUID,AIPUUID,accessionid,status"
        else:
            # Fetch all unique AIP UUIDs in the returned set of files.
            # ES query will limit to 10 aggregation results by default;
            # add size parameter in terms to override.
            # TODO: Use composite aggregation when it gets out of beta.
            query["aggs"] = {
                "aip_uuids": {"terms": {"field": "AIPUUID", "size": "10000"}}
            }
            # Don't return results, just the aggregation.
            query["size"] = 0
            # Searching for AIPs still actually searches type 'aipfile', and
            # returns the UUID of the AIP the files are a part of. To search
            # for an attribute of an AIP, the aipfile must index that
            # information about their AIP.
            results = es_client.search(body=query, index=es.AIP_FILES_INDEX)
            # Given these AIP UUIDs, now fetch the actual information we want
            # from AIPs/AIP.
            buckets = results["aggregations"]["aip_uuids"]["buckets"]
            uuids = [bucket["key"] for bucket in buckets]
            uuid_file_counts = {
                bucket["key"]: bucket["doc_count"] for bucket in buckets
            }
            query = {"query": {"terms": {"uuid": uuids}}}
            index = es.AIPS_INDEX
            source = "name,uuid,size,accessionids,created,status,encrypted,AICID,isPartOf,countAIPsinAIC,location"

        results = es_client.search(
            index=index,
            body=query,
            from_=start,
            size=page_size,
            sort=order_by + ":" + sort_direction if order_by else "",
            _source=source,
        )

        if file_mode:
            augmented_results = search_augment_file_results(es_client, results)
        else:
            augmented_results = search_augment_aip_results(results, uuid_file_counts)

        if request_file and not file_mode:
            return search_as_file(
                augmented_results, file_name=file_name, mime_type=file_mime
            )

        hit_count = results["hits"]["total"]

        return helpers.json_response(
            {
                "iTotalRecords": hit_count,
                "iTotalDisplayRecords": hit_count,
                "sEcho": int(
                    request.GET.get("sEcho", 0)
                ),  # It was recommended we convert sEcho to int to prevent XSS.
                "aaData": augmented_results,
            }
        )

    except ElasticsearchException:
        err_desc = "Error accessing AIPs index"
        logger.exception(err_desc)
        return HttpResponse(err_desc)
Esempio n. 8
0
def search(request):
    """
    A JSON end point that returns results for various backlog transfers and their files.

    :param request: The Django request object
    :return: A JSON object including required metadata for the datatable and the backlog search results.
    """
    # get search parameters from request
    queries, ops, fields, types = advanced_search.search_parameter_prep(request)

    file_mode = request.GET.get("file_mode") == "true"
    page_size = int(request.GET.get("iDisplayLength", 10))
    start = int(request.GET.get("iDisplayStart", 0))

    order_by = get_es_property_from_column_index(
        int(request.GET.get("iSortCol_0", 0)), file_mode
    )
    sort_direction = request.GET.get("sSortDir_0", "asc")

    es_client = es.get_client()

    if "query" not in request.GET:
        queries, ops, fields, types = (["*"], ["or"], [""], ["term"])

    query = advanced_search.assemble_query(
        queries, ops, fields, types, filters=[{"term": {"status": "backlog"}}]
    )

    try:
        if file_mode:
            index = es.TRANSFER_FILES_INDEX
            source = "filename,sipuuid,relative_path,accessionid,pending_deletion"
        else:
            # Transfer mode:
            # Query to transferfile, but only fetch & aggregrate transfer UUIDs.
            # Based on transfer UUIDs, query to transfers.
            # ES query will limit to 10 aggregation results by default,
            # add size parameter in terms to override.
            # TODO: Use composite aggregation when it gets out of beta.
            query["aggs"] = {
                "transfer_uuid": {"terms": {"field": "sipuuid", "size": "10000"}}
            }
            hits = es_client.search(
                index=es.TRANSFER_FILES_INDEX,
                body=query,
                size=0,  # Don't return results, only aggregation
            )
            uuids = [x["key"] for x in hits["aggregations"]["transfer_uuid"]["buckets"]]

            # Recreate query to search over transfers
            query = {"query": {"terms": {"uuid": uuids}}}
            index = es.TRANSFERS_INDEX
            source = (
                "name,uuid,file_count,ingest_date,accessionid,size,pending_deletion"
            )

        hits = es_client.search(
            index=index,
            body=query,
            from_=start,
            size=page_size,
            sort=order_by + ":" + sort_direction if order_by else "",
            _source=source,
        )
        hit_count = hits["hits"]["total"]

    except Exception:
        err_desc = "Error accessing transfers index"
        logger.exception(err_desc)
        return HttpResponse(err_desc)

    search_results = []

    es_results = [x["_source"] for x in hits["hits"]["hits"]]

    for result in es_results:
        # Format size
        size = result.get("size")
        if size is not None:
            result["size"] = filesizeformat(size)

        if file_mode:
            # We only check status against the Storage Service for
            # transfers, so include all files in search results.
            search_results.append(result)
        else:
            pending_deletion = result.get("pending_deletion")
            keep_in_results = sync_es_transfer_status_with_storage_service(
                result["uuid"], pending_deletion
            )
            # Only return details for transfers that haven't been
            # deleted from the Storage Service in the search results.
            if keep_in_results:
                search_results.append(result)

    return helpers.json_response(
        {
            "iTotalRecords": hit_count,
            "iTotalDisplayRecords": hit_count,
            "sEcho": int(
                request.GET.get("sEcho", 0)
            ),  # It was recommended we convert sEcho to int to prevent XSS
            "aaData": search_results,
        }
    )
Esempio n. 9
0
def search(request):
    # FIXME there has to be a better way of handling checkboxes than parsing
    # them by hand here, and displaying 'checked' in
    # _archival_storage_search_form.html
    # Parse checkbox for file mode
    yes_options = ('checked', 'yes', 'true', 'on')
    if request.GET.get('filemode', '') in yes_options:
        file_mode = True
        checked_if_in_file_mode = 'checked'
        items_per_page = 20
    else:  # AIP list
        file_mode = False
        checked_if_in_file_mode = ''
        items_per_page = 10

    # Parse checkbox for show AICs
    show_aics = ''
    if request.GET.get('show_aics', '') in yes_options:
        show_aics = 'checked'

    # get search parameters from request
    queries, ops, fields, types = advanced_search.search_parameter_prep(request)
    logger.debug('Queries: %s, Ops: %s, Fields: %s, Types: %s', queries, ops, fields, types)

    # redirect if no search params have been set
    if 'query' not in request.GET:
        return helpers.redirect_with_get_params(
            'components.archival_storage.views.search',
            query='',
            field='',
            type=''
        )

    # get string of URL parameters that should be passed along when paging
    search_params = advanced_search.extract_url_search_params_from_request(request)

    current_page_number = int(request.GET.get('page', 1))

    # perform search
    es_client = elasticSearchFunctions.get_client()
    results = None
    query = advanced_search.assemble_query(es_client, queries, ops, fields, types, search_index='aips', doc_type='aipfile')
    try:
        # use all results to pull transfer facets if not in file mode
        # pulling only one field (we don't need field data as we augment
        # the results using separate queries)
        if not file_mode:
            # Fetch all unique AIP UUIDs in the returned set of files
            query['aggs'] = {'aip_uuids': {'terms': {'field': 'AIPUUID', 'size': 0}}}
            # Don't return results, just the aggregation
            query['size'] = 0
            # Searching for AIPs still actually searches type 'aipfile', and
            # returns the UUID of the AIP the files are a part of.  To search
            # for an attribute of an AIP, the aipfile must index that
            # information about their AIP in
            # elasticSearchFunctions.index_mets_file_metadata
            results = es_client.search(
                body=query,
                index='aips',
                doc_type='aipfile',
                sort='sipName:desc',
            )
            # Given these AIP UUIDs, now fetch the actual information we want from aips/aip
            buckets = results['aggregations']['aip_uuids']['buckets']
            uuids = [bucket['key'] for bucket in buckets]
            uuid_file_counts = {bucket['key']: bucket['doc_count'] for bucket in buckets}
            query = {
                'query': {
                    'terms': {
                        'uuid': uuids,
                    },
                },
            }
            index = 'aips'
            doc_type = 'aip'
            fields = 'name,uuid,size,created,status,AICID,isPartOf,countAIPsinAIC,encrypted'
            sort = 'name:desc'
        else:
            index = 'aips'
            doc_type = 'aipfile'
            fields = 'AIPUUID,filePath,FILEUUID,encrypted'
            sort = 'sipName:desc'

        # To reduce amount of data fetched from ES, use LazyPagedSequence
        def es_pager(page, page_size):
            """
            Fetch one page of normalized aipfile entries from Elasticsearch.

            :param page: 1-indexed page to fetch
            :param page_size: Number of entries on a page
            :return: List of dicts for each entry with additional information
            """
            start = (page - 1) * page_size
            results = es_client.search(
                body=query,
                from_=start,
                size=page_size,
                index=index,
                doc_type=doc_type,
                fields=fields,
                sort=sort,
            )
            if file_mode:
                return search_augment_file_results(es_client, results)
            else:
                return search_augment_aip_results(results, uuid_file_counts)
        count = es_client.count(index=index, doc_type=doc_type, body={'query': query['query']})['count']
        results = LazyPagedSequence(es_pager, items_per_page, count)

    except ElasticsearchException:
        logger.exception('Error accessing index.')
        return HttpResponse('Error accessing index.')

    if not file_mode:
        aic_creation_form = forms.CreateAICForm(initial={'results': uuids})
    else:  # if file_mode
        aic_creation_form = None

    page_data = helpers.pager(results, items_per_page, current_page_number)

    return render(request, 'archival_storage/search.html',
                  {
                      'file_mode': file_mode,
                      'show_aics': show_aics,
                      'checked_if_in_file_mode': checked_if_in_file_mode,
                      'aic_creation_form': aic_creation_form,
                      'results': page_data.object_list,
                      'search_params': search_params,
                      'page': page_data,
                  }
                  )
Esempio n. 10
0
def search(request):
    # deal with transfer mode
    file_mode = False
    checked_if_in_file_mode = ''
    if request.GET.get('mode', '') != '':
        file_mode = True
        checked_if_in_file_mode = 'checked'

    # get search parameters from request
    queries, ops, fields, types = advanced_search.search_parameter_prep(request)

    # redirect if no search params have been set
    if not 'query' in request.GET:
        return helpers.redirect_with_get_params(
            'components.archival_storage.views.search',
            query='',
            field='',
            type=''
        )

    # get string of URL parameters that should be passed along when paging
    search_params = advanced_search.extract_url_search_params_from_request(request)

    # set paging variables
    if not file_mode:
        items_per_page = 2
    else:
        items_per_page = 20

    page = advanced_search.extract_page_number_from_url(request)

    start = page * items_per_page + 1

    # perform search
    conn = pyes.ES(elasticSearchFunctions.getElasticsearchServerHostAndPort())

    try:
        query=advanced_search.assemble_query(queries, ops, fields, types)

        # use all results to pull transfer facets if not in file mode
        # pulling only one field (we don't need field data as we augment
        # the results using separate queries)
        if not file_mode:
            results = conn.search_raw(
                query=query,
                indices='aips',
                type='aipfile',
                fields='uuid'
            )
        else:
            results = conn.search_raw(
                query=query,
                indices='aips',
                type='aipfile',
                start=start - 1,
                size=items_per_page,
                fields='AIPUUID,filePath,FILEUUID'
            )
    except:
        return HttpResponse('Error accessing index.')

    # take note of facet data
    aip_uuids = results['facets']['AIPUUID']['terms']

    if not file_mode:
        number_of_results = len(aip_uuids)

        page_data = helpers.pager(aip_uuids, items_per_page, page + 1)
        aip_uuids = page_data['objects']
        search_augment_aip_results(conn, aip_uuids)
    else:
        number_of_results = results.hits.total
        results = search_augment_file_results(results)

    # set remaining paging variables
    end, previous_page, next_page = advanced_search.paging_related_values_for_template_use(
       items_per_page,
       page,
       start,
       number_of_results
    )

    # make sure results is set
    try:
        if results:
            pass
    except:
        results = False

    form = forms.StorageSearchForm(initial={'query': queries[0]})
    return render(request, 'archival_storage/archival_storage_search.html', locals())
Esempio n. 11
0
def search(request):
    # FIXME there has to be a better way of handling checkboxes than parsing
    # them by hand here, and displaying 'checked' in
    # _archival_storage_search_form.html
    # Parse checkbox for file mode
    yes_options = ("checked", "yes", "true", "on")
    if request.GET.get("filemode", "") in yes_options:
        file_mode = True
        checked_if_in_file_mode = "checked"
        items_per_page = 20
    else:  # AIP list
        file_mode = False
        checked_if_in_file_mode = ""
        items_per_page = 10

    # Parse checkbox for show AICs
    show_aics = ""
    if request.GET.get("show_aics", "") in yes_options:
        show_aics = "checked"

    # get search parameters from request
    queries, ops, fields, types = advanced_search.search_parameter_prep(
        request)
    logger.debug("Queries: %s, Ops: %s, Fields: %s, Types: %s", queries, ops,
                 fields, types)

    # redirect if no search params have been set
    if "query" not in request.GET:
        return helpers.redirect_with_get_params(
            "components.archival_storage.views.search",
            query="",
            field="",
            type="")

    # get string of URL parameters that should be passed along when paging
    search_params = advanced_search.extract_url_search_params_from_request(
        request)

    current_page_number = int(request.GET.get("page", 1))

    # perform search
    es_client = elasticSearchFunctions.get_client()
    results = None
    query = advanced_search.assemble_query(queries, ops, fields, types)
    try:
        # Use all results to pull transfer facets if not in file mode
        # pulling only one field (we don't need field data as we augment
        # the results using separate queries).
        if not file_mode:
            # Fetch all unique AIP UUIDs in the returned set of files
            # ES query will limit to 10 aggregation results by default,
            # add size parameter in terms to override.
            # TODO: Use composite aggregation when it gets out of beta.
            query["aggs"] = {
                "aip_uuids": {
                    "terms": {
                        "field": "AIPUUID",
                        "size": "10000"
                    }
                }
            }
            # Don't return results, just the aggregation
            query["size"] = 0
            # Searching for AIPs still actually searches type 'aipfile', and
            # returns the UUID of the AIP the files are a part of.  To search
            # for an attribute of an AIP, the aipfile must index that
            # information about their AIP.
            results = es_client.search(body=query, index="aipfiles")
            # Given these AIP UUIDs, now fetch the actual information we want from aips/aip
            buckets = results["aggregations"]["aip_uuids"]["buckets"]
            uuids = [bucket["key"] for bucket in buckets]
            uuid_file_counts = {
                bucket["key"]: bucket["doc_count"]
                for bucket in buckets
            }
            query = {"query": {"terms": {"uuid": uuids}}}
            index = "aips"
            fields = (
                "name,uuid,size,created,status,AICID,isPartOf,countAIPsinAIC,encrypted"
            )
            sort = "name.raw:desc"
        else:
            index = "aipfiles"
            fields = "AIPUUID,filePath,FILEUUID,encrypted"
            sort = "sipName.raw:desc"

        # To reduce amount of data fetched from ES, use LazyPagedSequence
        def es_pager(page, page_size):
            """
            Fetch one page of normalized aipfile entries from Elasticsearch.

            :param page: 1-indexed page to fetch
            :param page_size: Number of entries on a page
            :return: List of dicts for each entry with additional information
            """
            start = (page - 1) * page_size
            results = es_client.search(
                body=query,
                from_=start,
                size=page_size,
                index=index,
                _source=fields,
                sort=sort,
            )
            if file_mode:
                return search_augment_file_results(es_client, results)
            else:
                return search_augment_aip_results(results, uuid_file_counts)

        count = es_client.count(index=index, body={"query":
                                                   query["query"]})["count"]
        results = LazyPagedSequence(es_pager, items_per_page, count)

    except ElasticsearchException:
        logger.exception("Error accessing index.")
        return HttpResponse("Error accessing index.")

    if not file_mode:
        aic_creation_form = forms.CreateAICForm(initial={"results": uuids})
    else:  # if file_mode
        aic_creation_form = None

    page_data = helpers.pager(results, items_per_page, current_page_number)

    return render(
        request,
        "archival_storage/search.html",
        {
            "file_mode": file_mode,
            "show_aics": show_aics,
            "checked_if_in_file_mode": checked_if_in_file_mode,
            "aic_creation_form": aic_creation_form,
            "results": page_data.object_list,
            "search_params": search_params,
            "page": page_data,
        },
    )
Esempio n. 12
0
def transfer_backlog(request, ui):
    """
    AJAX endpoint to query for and return transfer backlog items.
    """
    es_client = elasticSearchFunctions.get_client()

    # Get search parameters from request
    results = None

    # GET params in SIP arrange can control whether files in metadata/ and
    # logs/ are returned. Appraisal tab always hides these dirs and their files
    # (for now).
    backlog_filter = elasticSearchFunctions.BACKLOG_FILTER
    if ui == 'appraisal' or request.GET.get('hidemetadatalogs'):
        backlog_filter = elasticSearchFunctions.BACKLOG_FILTER_NO_MD_LOGS

    if 'query' not in request.GET:
        query = elasticSearchFunctions.MATCH_ALL_QUERY.copy()
        query['filter'] = backlog_filter
    else:
        queries, ops, fields, types = advanced_search.search_parameter_prep(
            request)

        try:
            query = advanced_search.assemble_query(
                es_client,
                queries,
                ops,
                fields,
                types,
                filters=backlog_filter,
            )
        except:
            logger.exception('Error accessing index.')
            return HttpResponse('Error accessing index.')

    # perform search
    try:
        results = elasticSearchFunctions.search_all_results(
            es_client,
            body=query,
            index='transfers',
            doc_type='transferfile',
        )
    except:
        logger.exception('Error accessing index.')
        return HttpResponse('Error accessing index.')

    # Convert results into a more workable form
    results = elasticSearchFunctions.augment_raw_search_results(results)

    # Convert to a form JS can use:
    # [{'name': <filename>,
    #   'properties': {'not_draggable': False}},
    #  {'name': <directory name>,
    #   'properties': {'not_draggable': True, 'object count': 3, 'display_string': '3 objects'},
    #   'children': [
    #    {'name': <filename>,
    #     'properties': {'not_draggable': True}},
    #    {'name': <directory name>,
    #     'children': [...]
    #    }
    #   ]
    #  },
    # ]
    return_list = []
    directory_map = {}
    # _es_results_to_directory_tree requires that paths MUST be sorted
    results.sort(key=lambda x: x['relative_path'])
    for path in results:
        # If a path is in SIPArrange.original_path, then it shouldn't be draggable
        not_draggable = False
        if models.SIPArrange.objects.filter(
                original_path__endswith=path['relative_path']).exists():
            not_draggable = True
        if ui == 'legacy':
            _es_results_to_directory_tree(path['relative_path'],
                                          return_list,
                                          not_draggable=not_draggable)
        else:
            _es_results_to_appraisal_tab_format(path,
                                                directory_map,
                                                return_list,
                                                not_draggable=not_draggable)

    if ui == 'legacy':
        response = return_list
    else:
        response = {
            'formats': [],  # TODO populate this
            'transfers': return_list,
        }

    # return JSON response
    return helpers.json_response(response)
Esempio n. 13
0
def search(request):
    """
    A JSON end point that returns results for various backlog transfers and their files.

    :param request: The Django request object
    :return: A JSON object including required metadata for the datatable and the backlog search results.
    """
    # get search parameters from request
    queries, ops, fields, types = advanced_search.search_parameter_prep(
        request)

    file_mode = request.GET.get('file_mode') == 'true'
    page_size = int(request.GET.get('iDisplayLength', 10))
    start = int(request.GET.get('iDisplayStart', 0))

    order_by = get_es_property_from_column_index(
        int(request.GET.get('iSortCol_0', 0)), file_mode)
    sort_direction = request.GET.get('sSortDir_0', 'asc')

    es_client = elasticSearchFunctions.get_client()

    if 'query' not in request.GET:
        queries, ops, fields, types = (['*'], ['or'], [''], ['term'])

    query = advanced_search.assemble_query(
        es_client,
        queries,
        ops,
        fields,
        types,
        search_index='transfers',
        doc_type='transferfile',
        filters={'term': {
            'status': 'backlog'
        }})

    try:
        if file_mode:
            doc_type = 'transferfile'
            source = 'filename,sipuuid,relative_path'
        else:  # Transfer mode
            # Query to transfers/transferfile, but only fetch & aggregrate transfer UUIDs
            # Based on transfer UUIDs, query to transfers/transfer
            # ES query will limit to 10 aggregation results by default, add size parameter in terms to override
            # (https://stackoverflow.com/questions/22927098/show-all-elasticsearch-aggregation-results-buckets-and-not-just-10)
            query['aggs'] = {
                'transfer_uuid': {
                    'terms': {
                        'field': 'sipuuid',
                        'size': '10000'
                    }
                }
            }
            hits = es_client.search(
                index='transfers',
                doc_type='transferfile',
                body=query,
                size=0,  # Don't return results, only aggregation
            )
            uuids = [
                x['key']
                for x in hits['aggregations']['transfer_uuid']['buckets']
            ]

            query['query'] = {
                'terms': {
                    'uuid': uuids,
                },
            }
            doc_type = 'transfer'
            source = 'name,uuid,file_count,ingest_date'

        hit_count = es_client.search(index='transfers',
                                     doc_type=doc_type,
                                     body=query,
                                     search_type='count')['hits']['total']
        hits = es_client.search(
            index='transfers',
            doc_type=doc_type,
            body=query,
            from_=start,
            size=page_size,
            sort=order_by + ':' + sort_direction if order_by else '',
            _source=source,
        )

    except Exception:
        err_desc = 'Error accessing transfers index'
        logger.exception(err_desc)
        return HttpResponse(err_desc)

    results = [x['_source'] for x in hits['hits']['hits']]

    return helpers.json_response({
        'iTotalRecords': hit_count,
        'iTotalDisplayRecords': hit_count,
        'sEcho': int(request.GET.get(
            'sEcho',
            0)),  # It was recommended we convert sEcho to int to prevent XSS
        'aaData': results,
    })
Esempio n. 14
0
def search(request):
    """
    A JSON end point that returns results for various backlog transfers and their files.

    :param request: The Django request object
    :return: A JSON object including required metadata for the datatable and the backlog search results.
    """
    # get search parameters from request
    queries, ops, fields, types = advanced_search.search_parameter_prep(
        request)

    file_mode = request.GET.get("file_mode") == "true"
    page_size = int(request.GET.get("iDisplayLength", 10))
    start = int(request.GET.get("iDisplayStart", 0))

    order_by = get_es_property_from_column_index(
        int(request.GET.get("iSortCol_0", 0)), file_mode)
    sort_direction = request.GET.get("sSortDir_0", "asc")

    es_client = elasticSearchFunctions.get_client()

    if "query" not in request.GET:
        queries, ops, fields, types = (["*"], ["or"], [""], ["term"])

    query = advanced_search.assemble_query(queries,
                                           ops,
                                           fields,
                                           types,
                                           filters=[{
                                               "term": {
                                                   "status": "backlog"
                                               }
                                           }])

    try:
        if file_mode:
            index = "transferfiles"
            source = "filename,sipuuid,relative_path"
        else:
            # Transfer mode:
            # Query to transferfile, but only fetch & aggregrate transfer UUIDs.
            # Based on transfer UUIDs, query to transfers.
            # ES query will limit to 10 aggregation results by default,
            # add size parameter in terms to override.
            # TODO: Use composite aggregation when it gets out of beta.
            query["aggs"] = {
                "transfer_uuid": {
                    "terms": {
                        "field": "sipuuid",
                        "size": "10000"
                    }
                }
            }
            hits = es_client.search(
                index="transferfiles",
                body=query,
                size=0,  # Don't return results, only aggregation
            )
            uuids = [
                x["key"]
                for x in hits["aggregations"]["transfer_uuid"]["buckets"]
            ]

            # Recreate query to search over transfers
            query = {"query": {"terms": {"uuid": uuids}}}
            index = "transfers"
            source = "name,uuid,file_count,ingest_date"

        hits = es_client.search(
            index=index,
            body=query,
            from_=start,
            size=page_size,
            sort=order_by + ":" + sort_direction if order_by else "",
            _source=source,
        )
        hit_count = hits["hits"]["total"]

    except Exception:
        err_desc = "Error accessing transfers index"
        logger.exception(err_desc)
        return HttpResponse(err_desc)

    results = [x["_source"] for x in hits["hits"]["hits"]]

    return helpers.json_response({
        "iTotalRecords": hit_count,
        "iTotalDisplayRecords": hit_count,
        "sEcho": int(request.GET.get(
            "sEcho",
            0)),  # It was recommended we convert sEcho to int to prevent XSS
        "aaData": results,
    })