def archival_storage_list_display(request, current_page_number=None): form = forms.StorageSearchForm() total_size = 0 # get ElasticSearch stats aip_indexed_file_count = advanced_search.indexed_count('aips') # get AIPs conn = elasticSearchFunctions.connect_and_create_index('aips') aipResults = conn.search(pyes.StringQuery('*'), doc_types=['aip']) aips = [] #if aipResults._total != None: if len(aipResults) > 0: for aip in aipResults: aips.append(aip) # handle pagination page = helpers.pager(aips, 10, current_page_number) sips = [] for aip in page['objects']: sip = {} sip['href'] = aip.filePath.replace(AIPSTOREPATH + '/', "AIPsStore/") sip['name'] = aip.name sip['uuid'] = aip.uuid #sip['date'] = str(aip.date)[0:19].replace('T', ' ') sip['date'] = aip.created try: size = float(aip.size) total_size = total_size + size sip['size'] = '{0:.2f} MB'.format(size) except: sip['size'] = 'Removed' sips.append(sip) order_by = request.GET.get('order_by', 'name') sort_by = request.GET.get('sort_by', 'up') def sort_aips(sip): value = 0 if 'name' == order_by: value = sip['name'].lower() else: value = sip[order_by] return value sips = sorted(sips, key=sort_aips) if sort_by == 'down': sips.reverse() total_size = '{0:.2f}'.format(total_size) return render(request, 'archival_storage/archival_storage.html', locals())
def archival_storage_list_display(request, current_page_number=None): form = forms.StorageSearchForm() total_size = 0 # get ElasticSearch stats aip_indexed_file_count = advanced_search.indexed_count('aips') # get AIPs conn = elasticSearchFunctions.connect_and_create_index('aips') aipResults = conn.search(pyes.StringQuery('*'), doc_types=['aip']) aips = [] #if aipResults._total != None: if len(aipResults) > 0: for aip in aipResults: aips.append(aip) # handle pagination page = helpers.pager(aips, 10, current_page_number) sips = [] for aip in page['objects']: sip = {} sip['href'] = aip.filePath.replace(AIPSTOREPATH + '/', "AIPsStore/") sip['name'] = aip.name sip['uuid'] = aip.uuid #sip['date'] = str(aip.date)[0:19].replace('T', ' ') sip['date'] = aip.created try: size = float(aip.size) total_size = total_size + size sip['size'] = '{0:.2f} MB'.format(size) except: sip['size'] = 'Removed' sips.append(sip) order_by = request.GET.get('order_by', 'name'); sort_by = request.GET.get('sort_by', 'up'); def sort_aips(sip): value = 0 if 'name' == order_by: value = sip['name'].lower() else: value = sip[order_by] return value sips = sorted(sips, key = sort_aips) if sort_by == 'down': sips.reverse() total_size = '{0:.2f}'.format(total_size) return render(request, 'archival_storage/archival_storage.html', locals())
def filter_search_fields(search_fields, index=None, doc_type=None): """ Given search fields which search nested documents with wildcards (such as "transferMetadata.*"), returns a list of subfields filtered to contain only string-type fields. When searching all fields of nested documents of mixed types using query_string queries, query_string queries may fail because the way the query string is interpreted depends on the type of the field being searched. For example, given a nested document containing a string field and a date field, a query_string of "foo" would fail when Elasticsearch attempts to parse it as a date to match it against the date field. This function uses the actual current mapping, so it supports automatically-mapped fields. Sample input and output, given a nested document containing three fields, "Bagging-Date" (date), "Bag-Name" (string), and "Bag-Type" (string): ["transferMetadata.*"] #=> ["transferMetadata.Bag-Name", "transferMetadata.Bag-Type"] :param list search_fields: A list of strings representing nested object names. :param str index: The name of the search index, used to look up the mapping document. If not provided, the original search_fields is returned unmodified. :param str doc_type: The name of the document type within the search index, used to look up the mapping document. If not provided, the original search_fields is returned unmodified. """ if index is None or doc_type is None: return search_fields new_fields = [] for field in search_fields: # Not a wildcard nested document search, so just add to the list as-is if not field.endswith('.*'): new_fields.append(field) continue try: field_name = field.rsplit('.', 1)[0] conn = elasticSearchFunctions.connect_and_create_index(index) mapping = elasticSearchFunctions.get_type_mapping( conn, index, doc_type) subfields = mapping[doc_type]['properties'][field_name][ 'properties'] except KeyError: # The requested field doesn't exist in the index, so don't worry about validating subfields new_fields.append(field) else: for subfield, field_properties in subfields.iteritems(): if field_properties['type'] == 'string': new_fields.append(field_name + '.' + subfield) return new_fields
def transfer_backlog(request): # deal with transfer mode file_mode = False checked_if_in_file_mode = '' if request.GET.get('mode', '') != '': file_mode = True checked_if_in_file_mode = 'checked' # get search parameters from request queries, ops, fields, types = advanced_search.search_parameter_prep( request) # redirect if no search params have been set if not 'query' in request.GET: return helpers.redirect_with_get_params( 'components.ingest.views.transfer_backlog', query='', field='', type='') # get string of URL parameters that should be passed along when paging search_params = advanced_search.extract_url_search_params_from_request( request) # set paging variables if not file_mode: items_per_page = 10 else: items_per_page = 20 page = advanced_search.extract_page_number_from_url(request) start = page * items_per_page + 1 # perform search conn = elasticSearchFunctions.connect_and_create_index('transfers') try: query = advanced_search.assemble_query( queries, ops, fields, types, must_haves=[pyes.TermQuery('status', 'backlog')]) # use all results to pull transfer facets if not in file mode if not file_mode: results = conn.search_raw( query, indices='transfers', type='transferfile', ) else: # otherwise use pages results results = conn.search_raw(query, indices='transfers', type='transferfile', start=start - 1, size=items_per_page) except: return HttpResponse('Error accessing index.') # take note of facet data file_extension_usage = results['facets']['fileExtension']['terms'] transfer_uuids = results['facets']['sipuuid']['terms'] if not file_mode: # run through transfers to see if they've been created yet awaiting_creation = {} for transfer_instance in transfer_uuids: try: awaiting_creation[transfer_instance. term] = transfer_awaiting_sip_creation_v2( transfer_instance.term) transfer = models.Transfer.objects.get( uuid=transfer_instance.term) transfer_basename = os.path.basename( transfer.currentlocation[:-1]) transfer_instance.name = transfer_basename[:-37] transfer_instance.type = transfer.type if transfer.accessionid != None: transfer_instance.accession = transfer.accessionid else: transfer_instance.accession = '' except: awaiting_creation[transfer_instance.term] = False # page data number_of_results = len(transfer_uuids) page_data = helpers.pager(transfer_uuids, items_per_page, page + 1) transfer_uuids = page_data['objects'] else: # page data number_of_results = results.hits.total results = transfer_backlog_augment_search_results(results) # set remaining paging variables end, previous_page, next_page = advanced_search.paging_related_values_for_template_use( items_per_page, page, start, number_of_results) # make sure results is set try: if results: pass except: results = False form = StorageSearchForm(initial={'query': queries[0]}) return render(request, 'ingest/backlog/search.html', locals())
def list_display(request): current_page_number = request.GET.get('page', 1) form = forms.StorageSearchForm() # get ElasticSearch stats aip_indexed_file_count = advanced_search.indexed_count('aips') # get AIPs order_by = request.GET.get('order_by', 'name') sort_by = request.GET.get('sort_by', 'up') if sort_by == 'down': sort_direction = 'desc' else: sort_direction = 'asc' sort_specification = order_by + ':' + sort_direction conn = elasticSearchFunctions.connect_and_create_index('aips') items_per_page = 10 start = (int(current_page_number) - 1) * items_per_page aipResults = conn.search(pyes.Search(pyes.MatchAllQuery(), start=start, size=items_per_page), doc_types=['aip'], fields='origin,uuid,filePath,created,name,size', sort=sort_specification) try: len(aipResults) except pyes.exceptions.ElasticSearchException: # there will be an error if no mapping exists for AIPs due to no AIPs # having been created return render(request, 'archival_storage/archival_storage.html', locals()) # handle pagination page = helpers.pager(aipResults, items_per_page, current_page_number) if not page: raise Http404 # augment data sips = [] for aip in page['objects']: sip = {} sip['href'] = aip.filePath.replace(AIPSTOREPATH + '/', "AIPsStore/") sip['name'] = aip.name sip['uuid'] = aip.uuid sip['date'] = aip.created try: size = float(aip.size) sip['size'] = '{0:.2f} MB'.format(size) except: sip['size'] = 'Removed' sips.append(sip) # get total size of all AIPS from ElasticSearch q = pyes.MatchAllQuery().search() q.facet.add(pyes.facets.StatisticalFacet('total', field='size')) aipResults = conn.search(q, doc_types=['aip']) total_size = aipResults.facets.total.total total_size = '{0:.2f}'.format(total_size) return render(request, 'archival_storage/archival_storage.html', locals())
def transfer_backlog(request): """ AJAX endpoint to query for and return transfer backlog items. """ # Get search parameters from request results = None conn = elasticSearchFunctions.connect_and_create_index('transfers') if not 'query' in request.GET: query = elasticSearchFunctions.MATCH_ALL_QUERY else: queries, ops, fields, types = advanced_search.search_parameter_prep(request) try: query = advanced_search.assemble_query( queries, ops, fields, types, # Specify this as a filter, not a must_have, for performance, # and so that it doesn't cause the "should" queries in a # should-only query to be ignored. filters={'term': {'status': 'backlog'}}, ) except: logger.exception('Error accessing index.') return HttpResponse('Error accessing index.') # perform search try: results = elasticSearchFunctions.search_all_results( conn, body=query, index='transfers', doc_type='transferfile', ) except: logger.exception('Error accessing index.') return HttpResponse('Error accessing index.') # Convert results into a more workable form results = _transfer_backlog_augment_search_results(results) # Convert to a form JS can use: # [{'name': <filename>, # 'properties': {'not_draggable': False}}, # {'name': <directory name>, # 'properties': {'not_draggable': True, 'object count': 3, 'display_string': '3 objects'}, # 'children': [ # {'name': <filename>, # 'properties': {'not_draggable': True}}, # {'name': <directory name>, # 'children': [...] # } # ] # }, # ] return_list = [] # _es_results_to_directory_tree requires that paths MUST be sorted results.sort(key=lambda x: x['relative_path']) for path in results: # If a path is in SIPArrange.original_path, then it shouldn't be draggable not_draggable = False if models.SIPArrange.objects.filter( original_path__endswith=path['relative_path']).exists(): not_draggable = True _es_results_to_directory_tree(path['relative_path'], return_list, not_draggable=not_draggable) # retun JSON response return helpers.json_response(return_list)
def transfer_backlog(request): # deal with transfer mode file_mode = False checked_if_in_file_mode = '' if request.GET.get('mode', '') != '': file_mode = True checked_if_in_file_mode = 'checked' # get search parameters from request queries, ops, fields, types = advanced_search.search_parameter_prep(request) # redirect if no search params have been set if not 'query' in request.GET: return helpers.redirect_with_get_params( 'components.ingest.views.transfer_backlog', query='', field='', type='' ) # get string of URL parameters that should be passed along when paging search_params = advanced_search.extract_url_search_params_from_request(request) # set paging variables if not file_mode: items_per_page = 10 else: items_per_page = 20 page = advanced_search.extract_page_number_from_url(request) start = page * items_per_page + 1 # perform search conn = elasticSearchFunctions.connect_and_create_index('transfers') try: query = advanced_search.assemble_query( queries, ops, fields, types, must_haves=[pyes.TermQuery('status', 'backlog')] ) # use all results to pull transfer facets if not in file mode if not file_mode: results = conn.search_raw( query, indices='transfers', type='transferfile', ) else: # otherwise use pages results results = conn.search_raw( query, indices='transfers', type='transferfile', start=start - 1, size=items_per_page ) except: return HttpResponse('Error accessing index.') # take note of facet data file_extension_usage = results['facets']['fileExtension']['terms'] transfer_uuids = results['facets']['sipuuid']['terms'] if not file_mode: # run through transfers to see if they've been created yet awaiting_creation = {} for transfer_instance in transfer_uuids: try: awaiting_creation[transfer_instance.term] = transfer_awaiting_sip_creation_v2(transfer_instance.term) transfer = models.Transfer.objects.get(uuid=transfer_instance.term) transfer_basename = os.path.basename(transfer.currentlocation[:-1]) transfer_instance.name = transfer_basename[:-37] transfer_instance.type = transfer.type if transfer.accessionid != None: transfer_instance.accession = transfer.accessionid else: transfer_instance.accession = '' except: awaiting_creation[transfer_instance.term] = False # page data number_of_results = len(transfer_uuids) page_data = helpers.pager(transfer_uuids, items_per_page, page + 1) transfer_uuids = page_data['objects'] else: # page data number_of_results = results.hits.total results = transfer_backlog_augment_search_results(results) # set remaining paging variables end, previous_page, next_page = advanced_search.paging_related_values_for_template_use( items_per_page, page, start, number_of_results ) # make sure results is set try: if results: pass except: results = False form = StorageSearchForm(initial={'query': queries[0]}) return render(request, 'ingest/backlog/search.html', locals())
def list_display(request): current_page_number = int(request.GET.get('page', 1)) logger.debug('Current page: %s', current_page_number) # get count of AIP files aip_indexed_file_count = aip_file_count() # get AIPs order_by = request.GET.get('order_by', 'name_unanalyzed') sort_by = request.GET.get('sort_by', 'up') if sort_by == 'down': sort_direction = 'desc' else: sort_direction = 'asc' sort_specification = order_by + ':' + sort_direction sort_params = 'order_by=' + order_by + '&sort_by=' + sort_by conn = elasticSearchFunctions.connect_and_create_index('aips') # get list of UUIDs of AIPs that are deleted or pending deletion aips_deleted_or_pending_deletion = [] should_haves = [ {'match': {'status': 'DEL_REQ'}}, {'match': {'status': 'DELETED'}}, ] query = { "query": { "bool": { "should": should_haves } } } deleted_aip_results = conn.search( body=query, index='aips', doc_type='aip', fields='uuid,status' ) for deleted_aip in deleted_aip_results['hits']['hits']: aips_deleted_or_pending_deletion.append(deleted_aip['fields']['uuid'][0]) # Fetch results and paginate def es_pager(page, page_size): """ Fetch one page of normalized entries from Elasticsearch. :param page: 1-indexed page to fetch :param page_size: Number of entries on a page :return: List of dicts for each entry, where keys and values have been cleaned up """ start = (page - 1) * page_size results = conn.search( index='aips', doc_type='aip', body=elasticSearchFunctions.MATCH_ALL_QUERY, fields='origin,uuid,filePath,created,name,size', sort=sort_specification, size=page_size, from_=start, ) # normalize results - each of the fields contains a single value, # but is returned from the ES API as a single-length array # e.g. {"fields": {"uuid": ["abcd"], "name": ["aip"] ...}} return [elasticSearchFunctions.normalize_results_dict(d) for d in results['hits']['hits']] items_per_page = 10 count = conn.count(index='aips', doc_type='aip', body=elasticSearchFunctions.MATCH_ALL_QUERY)['count'] results = LazyPagedSequence(es_pager, page_size=items_per_page, length=count) # Paginate page = helpers.pager( results, items_per_page, current_page_number ) # process deletion, etc., and format results aips = [] for aip in page.object_list: # If an AIP was deleted or is pending deletion, react if status changed if aip['uuid'] in aips_deleted_or_pending_deletion: # check with storage server to see current status api_results = storage_service.get_file_info(uuid=aip['uuid']) try: aip_status = api_results[0]['status'] except IndexError: # Storage service does not know about this AIP # TODO what should happen here? logger.info("AIP not found in storage service: {}".format(aip)) continue # delete AIP metadata in ElasticSearch if AIP has been deleted from the # storage server # TODO: handle this asynchronously if aip_status == 'DELETED': elasticSearchFunctions.delete_aip(aip['uuid']) elasticSearchFunctions.connect_and_delete_aip_files(aip['uuid']) elif aip_status != 'DEL_REQ': # update the status in ElasticSearch for this AIP elasticSearchFunctions.connect_and_mark_stored(aip['uuid']) else: aip_status = 'UPLOADED' # Tweak AIP presentation and add to display array if aip_status != 'DELETED': aip['status'] = AIP_STATUS_DESCRIPTIONS[aip_status] try: size = '{0:.2f} MB'.format(float(aip['size'])) except (TypeError, ValueError): size = 'Removed' aip['size'] = size aip['href'] = aip['filePath'].replace(AIPSTOREPATH + '/', "AIPsStore/") aip['date'] = aip['created'] aips.append(aip) total_size = total_size_of_aips(conn) return render(request, 'archival_storage/archival_storage.html', { 'total_size': total_size, 'aip_indexed_file_count': aip_indexed_file_count, 'aips': aips, 'page': page, 'search_params': sort_params, } )
def list_display(request): current_page_number = request.GET.get('page', 1) form = forms.StorageSearchForm() # get ElasticSearch stats aip_indexed_file_count = advanced_search.indexed_count('aips') # get AIPs order_by = request.GET.get('order_by', 'name') sort_by = request.GET.get('sort_by', 'up') if sort_by == 'down': sort_direction = 'desc' else: sort_direction = 'asc' sort_specification = order_by + ':' + sort_direction conn = elasticSearchFunctions.connect_and_create_index('aips') items_per_page = 10 start = (int(current_page_number) - 1) * items_per_page aipResults = conn.search( pyes.Search(pyes.MatchAllQuery(), start=start, size=items_per_page), doc_types=['aip'], fields='origin,uuid,filePath,created,name,size', sort=sort_specification ) try: len(aipResults) except pyes.exceptions.ElasticSearchException: # there will be an error if no mapping exists for AIPs due to no AIPs # having been created return render(request, 'archival_storage/archival_storage.html', locals()) # handle pagination page = helpers.pager( aipResults, items_per_page, current_page_number ) if not page: raise Http404 # augment data sips = [] for aip in page['objects']: sip = {} sip['href'] = aip.filePath.replace(AIPSTOREPATH + '/', "AIPsStore/") sip['name'] = aip.name sip['uuid'] = aip.uuid sip['date'] = aip.created try: size = float(aip.size) sip['size'] = '{0:.2f} MB'.format(size) except: sip['size'] = 'Removed' sips.append(sip) # get total size of all AIPS from ElasticSearch q = pyes.MatchAllQuery().search() q.facet.add(pyes.facets.StatisticalFacet('total', field='size')) aipResults = conn.search(q, doc_types=['aip']) total_size = aipResults.facets.total.total total_size = '{0:.2f}'.format(total_size) return render(request, 'archival_storage/archival_storage.html', locals())