Python connect_and_create_index 예제들, elasticSearchFunctions.connect_and_create_index Python 예제들

예제 #1

0

파일 보기

def archival_storage_list_display(request, current_page_number=None):
    form = forms.StorageSearchForm()

    total_size = 0

    # get ElasticSearch stats
    aip_indexed_file_count = advanced_search.indexed_count('aips')

    # get AIPs
    conn = elasticSearchFunctions.connect_and_create_index('aips')
    aipResults = conn.search(pyes.StringQuery('*'), doc_types=['aip'])
    aips = []

    #if aipResults._total != None:
    if len(aipResults) > 0:
        for aip in aipResults:
            aips.append(aip)

    # handle pagination
    page = helpers.pager(aips, 10, current_page_number)

    sips = []
    for aip in page['objects']:
        sip = {}
        sip['href'] = aip.filePath.replace(AIPSTOREPATH + '/', "AIPsStore/")
        sip['name'] = aip.name
        sip['uuid'] = aip.uuid

        #sip['date'] = str(aip.date)[0:19].replace('T', ' ')
        sip['date'] = aip.created

        try:
            size = float(aip.size)
            total_size = total_size + size
            sip['size'] = '{0:.2f} MB'.format(size)
        except:
            sip['size'] = 'Removed'

        sips.append(sip)

    order_by = request.GET.get('order_by', 'name')
    sort_by = request.GET.get('sort_by', 'up')

    def sort_aips(sip):
        value = 0
        if 'name' == order_by:
            value = sip['name'].lower()
        else:
            value = sip[order_by]
        return value

    sips = sorted(sips, key=sort_aips)

    if sort_by == 'down':
        sips.reverse()

    total_size = '{0:.2f}'.format(total_size)

    return render(request, 'archival_storage/archival_storage.html', locals())

예제 #2

0

파일 보기

파일: views.py 프로젝트: jooser/archivematica

def archival_storage_list_display(request, current_page_number=None):
    form = forms.StorageSearchForm()

    total_size = 0

    # get ElasticSearch stats
    aip_indexed_file_count = advanced_search.indexed_count('aips')

    # get AIPs
    conn = elasticSearchFunctions.connect_and_create_index('aips')
    aipResults = conn.search(pyes.StringQuery('*'), doc_types=['aip'])
    aips = []

    #if aipResults._total != None:
    if len(aipResults) > 0:
        for aip in aipResults:
            aips.append(aip)

    # handle pagination
    page = helpers.pager(aips, 10, current_page_number)

    sips = []
    for aip in page['objects']:
        sip = {}
        sip['href'] = aip.filePath.replace(AIPSTOREPATH + '/', "AIPsStore/")
        sip['name'] = aip.name
        sip['uuid'] = aip.uuid

        #sip['date'] = str(aip.date)[0:19].replace('T', ' ')
        sip['date'] = aip.created

        try:
            size = float(aip.size)
            total_size = total_size + size
            sip['size'] = '{0:.2f} MB'.format(size)
        except:
            sip['size'] = 'Removed'

        sips.append(sip)

    order_by = request.GET.get('order_by', 'name');
    sort_by  = request.GET.get('sort_by', 'up');

    def sort_aips(sip):
        value = 0
        if 'name' == order_by:
            value = sip['name'].lower()
        else:
            value = sip[order_by]
        return value
    sips = sorted(sips, key = sort_aips)

    if sort_by == 'down':
        sips.reverse()

    total_size = '{0:.2f}'.format(total_size)

    return render(request, 'archival_storage/archival_storage.html', locals())

예제 #3

0

파일 보기

파일: advanced_search.py 프로젝트: raulcesar/archivematica

def filter_search_fields(search_fields, index=None, doc_type=None):
    """
    Given search fields which search nested documents with wildcards (such as "transferMetadata.*"), returns a list of subfields filtered to contain only string-type fields.

    When searching all fields of nested documents of mixed types using query_string queries, query_string queries may fail because the way the query string is interpreted depends on the type of the field being searched.
    For example, given a nested document containing a string field and a date field, a query_string of "foo" would fail when Elasticsearch attempts to parse it as a date to match it against the date field.
    This function uses the actual current mapping, so it supports automatically-mapped fields.

    Sample input and output, given a nested document containing three fields, "Bagging-Date" (date), "Bag-Name" (string), and "Bag-Type" (string):
    ["transferMetadata.*"] #=> ["transferMetadata.Bag-Name", "transferMetadata.Bag-Type"]

    :param list search_fields: A list of strings representing nested object names.
    :param str index: The name of the search index, used to look up the mapping document.
        If not provided, the original search_fields is returned unmodified.
    :param str doc_type: The name of the document type within the search index, used to look up the mapping document.
        If not provided, the original search_fields is returned unmodified.
    """
    if index is None or doc_type is None:
        return search_fields

    new_fields = []
    for field in search_fields:
        # Not a wildcard nested document search, so just add to the list as-is
        if not field.endswith('.*'):
            new_fields.append(field)
            continue
        try:
            field_name = field.rsplit('.', 1)[0]
            conn = elasticSearchFunctions.connect_and_create_index(index)
            mapping = elasticSearchFunctions.get_type_mapping(
                conn, index, doc_type)
            subfields = mapping[doc_type]['properties'][field_name][
                'properties']
        except KeyError:
            # The requested field doesn't exist in the index, so don't worry about validating subfields
            new_fields.append(field)
        else:
            for subfield, field_properties in subfields.iteritems():
                if field_properties['type'] == 'string':
                    new_fields.append(field_name + '.' + subfield)

    return new_fields

예제 #4

0

파일 보기

def transfer_backlog(request):
    # deal with transfer mode
    file_mode = False
    checked_if_in_file_mode = ''
    if request.GET.get('mode', '') != '':
        file_mode = True
        checked_if_in_file_mode = 'checked'

    # get search parameters from request
    queries, ops, fields, types = advanced_search.search_parameter_prep(
        request)

    # redirect if no search params have been set
    if not 'query' in request.GET:
        return helpers.redirect_with_get_params(
            'components.ingest.views.transfer_backlog',
            query='',
            field='',
            type='')

    # get string of URL parameters that should be passed along when paging
    search_params = advanced_search.extract_url_search_params_from_request(
        request)

    # set paging variables
    if not file_mode:
        items_per_page = 10
    else:
        items_per_page = 20

    page = advanced_search.extract_page_number_from_url(request)

    start = page * items_per_page + 1

    # perform search
    conn = elasticSearchFunctions.connect_and_create_index('transfers')

    try:
        query = advanced_search.assemble_query(
            queries,
            ops,
            fields,
            types,
            must_haves=[pyes.TermQuery('status', 'backlog')])

        # use all results to pull transfer facets if not in file mode
        if not file_mode:
            results = conn.search_raw(
                query,
                indices='transfers',
                type='transferfile',
            )
        else:
            # otherwise use pages results
            results = conn.search_raw(query,
                                      indices='transfers',
                                      type='transferfile',
                                      start=start - 1,
                                      size=items_per_page)
    except:
        return HttpResponse('Error accessing index.')

    # take note of facet data
    file_extension_usage = results['facets']['fileExtension']['terms']
    transfer_uuids = results['facets']['sipuuid']['terms']

    if not file_mode:
        # run through transfers to see if they've been created yet
        awaiting_creation = {}
        for transfer_instance in transfer_uuids:
            try:
                awaiting_creation[transfer_instance.
                                  term] = transfer_awaiting_sip_creation_v2(
                                      transfer_instance.term)
                transfer = models.Transfer.objects.get(
                    uuid=transfer_instance.term)
                transfer_basename = os.path.basename(
                    transfer.currentlocation[:-1])
                transfer_instance.name = transfer_basename[:-37]
                transfer_instance.type = transfer.type
                if transfer.accessionid != None:
                    transfer_instance.accession = transfer.accessionid
                else:
                    transfer_instance.accession = ''
            except:
                awaiting_creation[transfer_instance.term] = False

        # page data
        number_of_results = len(transfer_uuids)
        page_data = helpers.pager(transfer_uuids, items_per_page, page + 1)
        transfer_uuids = page_data['objects']
    else:
        # page data
        number_of_results = results.hits.total
        results = transfer_backlog_augment_search_results(results)

    # set remaining paging variables
    end, previous_page, next_page = advanced_search.paging_related_values_for_template_use(
        items_per_page, page, start, number_of_results)

    # make sure results is set
    try:
        if results:
            pass
    except:
        results = False

    form = StorageSearchForm(initial={'query': queries[0]})
    return render(request, 'ingest/backlog/search.html', locals())

예제 #5

0

파일 보기

def list_display(request):
    current_page_number = request.GET.get('page', 1)

    form = forms.StorageSearchForm()

    # get ElasticSearch stats
    aip_indexed_file_count = advanced_search.indexed_count('aips')

    # get AIPs
    order_by = request.GET.get('order_by', 'name')
    sort_by = request.GET.get('sort_by', 'up')

    if sort_by == 'down':
        sort_direction = 'desc'
    else:
        sort_direction = 'asc'

    sort_specification = order_by + ':' + sort_direction

    conn = elasticSearchFunctions.connect_and_create_index('aips')

    items_per_page = 10
    start = (int(current_page_number) - 1) * items_per_page

    aipResults = conn.search(pyes.Search(pyes.MatchAllQuery(),
                                         start=start,
                                         size=items_per_page),
                             doc_types=['aip'],
                             fields='origin,uuid,filePath,created,name,size',
                             sort=sort_specification)

    try:
        len(aipResults)
    except pyes.exceptions.ElasticSearchException:
        # there will be an error if no mapping exists for AIPs due to no AIPs
        # having been created
        return render(request, 'archival_storage/archival_storage.html',
                      locals())

    # handle pagination
    page = helpers.pager(aipResults, items_per_page, current_page_number)

    if not page:
        raise Http404

    # augment data
    sips = []
    for aip in page['objects']:
        sip = {}
        sip['href'] = aip.filePath.replace(AIPSTOREPATH + '/', "AIPsStore/")
        sip['name'] = aip.name
        sip['uuid'] = aip.uuid

        sip['date'] = aip.created

        try:
            size = float(aip.size)
            sip['size'] = '{0:.2f} MB'.format(size)
        except:
            sip['size'] = 'Removed'

        sips.append(sip)

    # get total size of all AIPS from ElasticSearch
    q = pyes.MatchAllQuery().search()
    q.facet.add(pyes.facets.StatisticalFacet('total', field='size'))
    aipResults = conn.search(q, doc_types=['aip'])
    total_size = aipResults.facets.total.total
    total_size = '{0:.2f}'.format(total_size)

    return render(request, 'archival_storage/archival_storage.html', locals())

예제 #6

0

파일 보기

def transfer_backlog(request):
    """
    AJAX endpoint to query for and return transfer backlog items.
    """
    # Get search parameters from request
    results = None
    conn = elasticSearchFunctions.connect_and_create_index('transfers')

    if not 'query' in request.GET:
        query = elasticSearchFunctions.MATCH_ALL_QUERY
    else:
        queries, ops, fields, types = advanced_search.search_parameter_prep(request)

        try:
            query = advanced_search.assemble_query(
                queries,
                ops,
                fields,
                types,
                # Specify this as a filter, not a must_have, for performance,
                # and so that it doesn't cause the "should" queries in a
                # should-only query to be ignored.
                filters={'term': {'status': 'backlog'}},
            )
        except:
            logger.exception('Error accessing index.')
            return HttpResponse('Error accessing index.')

    # perform search
    try:
        results = elasticSearchFunctions.search_all_results(
            conn,
            body=query,
            index='transfers',
            doc_type='transferfile',
        )
    except:
        logger.exception('Error accessing index.')
        return HttpResponse('Error accessing index.')


    # Convert results into a more workable form
    results = _transfer_backlog_augment_search_results(results)

    # Convert to a form JS can use:
    # [{'name': <filename>,
    #   'properties': {'not_draggable': False}},
    #  {'name': <directory name>,
    #   'properties': {'not_draggable': True, 'object count': 3, 'display_string': '3 objects'},
    #   'children': [
    #    {'name': <filename>,
    #     'properties': {'not_draggable': True}},
    #    {'name': <directory name>,
    #     'children': [...]
    #    }
    #   ]
    #  },
    # ]
    return_list = []
    # _es_results_to_directory_tree requires that paths MUST be sorted
    results.sort(key=lambda x: x['relative_path'])
    for path in results:
        # If a path is in SIPArrange.original_path, then it shouldn't be draggable
        not_draggable = False
        if models.SIPArrange.objects.filter(
            original_path__endswith=path['relative_path']).exists():
            not_draggable = True
        _es_results_to_directory_tree(path['relative_path'], return_list, not_draggable=not_draggable)

    # retun JSON response
    return helpers.json_response(return_list)

예제 #7

0

파일 보기

파일: views.py 프로젝트: jooser/archivematica

def transfer_backlog(request):
    # deal with transfer mode
    file_mode = False
    checked_if_in_file_mode = ''
    if request.GET.get('mode', '') != '':
        file_mode = True
        checked_if_in_file_mode = 'checked'

    # get search parameters from request
    queries, ops, fields, types = advanced_search.search_parameter_prep(request)

    # redirect if no search params have been set 
    if not 'query' in request.GET:
        return helpers.redirect_with_get_params(
            'components.ingest.views.transfer_backlog',
            query='',
            field='',
            type=''
        )

    # get string of URL parameters that should be passed along when paging
    search_params = advanced_search.extract_url_search_params_from_request(request)

    # set paging variables
    if not file_mode:
        items_per_page = 10
    else:
        items_per_page = 20

    page = advanced_search.extract_page_number_from_url(request)

    start = page * items_per_page + 1

    # perform search
    conn = elasticSearchFunctions.connect_and_create_index('transfers')

    try:
        query = advanced_search.assemble_query(
            queries,
            ops,
            fields,
            types,
            must_haves=[pyes.TermQuery('status', 'backlog')]
        )

        # use all results to pull transfer facets if not in file mode
        if not file_mode:
            results = conn.search_raw(
                query,
                indices='transfers',
                type='transferfile',
            )
        else:
        # otherwise use pages results
            results = conn.search_raw(
                query,
                indices='transfers',
                type='transferfile',
                start=start - 1,
                size=items_per_page
            )
    except:
        return HttpResponse('Error accessing index.')

    # take note of facet data
    file_extension_usage = results['facets']['fileExtension']['terms']
    transfer_uuids       = results['facets']['sipuuid']['terms']

    if not file_mode:
        # run through transfers to see if they've been created yet
        awaiting_creation = {}
        for transfer_instance in transfer_uuids:
            try:
                awaiting_creation[transfer_instance.term] = transfer_awaiting_sip_creation_v2(transfer_instance.term)
                transfer = models.Transfer.objects.get(uuid=transfer_instance.term)
                transfer_basename = os.path.basename(transfer.currentlocation[:-1])
                transfer_instance.name = transfer_basename[:-37]
                transfer_instance.type = transfer.type
                if transfer.accessionid != None:
                    transfer_instance.accession = transfer.accessionid
                else:
                    transfer_instance.accession = ''
            except:
                awaiting_creation[transfer_instance.term] = False

        # page data
        number_of_results = len(transfer_uuids)
        page_data = helpers.pager(transfer_uuids, items_per_page, page + 1)
        transfer_uuids = page_data['objects']
    else:
        # page data
        number_of_results = results.hits.total
        results = transfer_backlog_augment_search_results(results)

    # set remaining paging variables
    end, previous_page, next_page = advanced_search.paging_related_values_for_template_use(
       items_per_page,
       page,
       start,
       number_of_results
    )

    # make sure results is set
    try:
        if results:
            pass
    except:
        results = False

    form = StorageSearchForm(initial={'query': queries[0]})
    return render(request, 'ingest/backlog/search.html', locals())

예제 #8

0

파일 보기

def list_display(request):
    current_page_number = int(request.GET.get('page', 1))
    logger.debug('Current page: %s', current_page_number)

    # get count of AIP files
    aip_indexed_file_count = aip_file_count()

    # get AIPs
    order_by = request.GET.get('order_by', 'name_unanalyzed')
    sort_by  = request.GET.get('sort_by', 'up')

    if sort_by == 'down':
        sort_direction = 'desc'
    else:
        sort_direction = 'asc'

    sort_specification = order_by + ':' + sort_direction
    sort_params = 'order_by=' + order_by + '&sort_by=' + sort_by

    conn = elasticSearchFunctions.connect_and_create_index('aips')

    # get list of UUIDs of AIPs that are deleted or pending deletion
    aips_deleted_or_pending_deletion = []
    should_haves = [
        {'match': {'status': 'DEL_REQ'}},
        {'match': {'status': 'DELETED'}},
    ]
    query = {
        "query": {
            "bool": {
                "should": should_haves
            }
        }
    }
    deleted_aip_results = conn.search(
        body=query,
        index='aips',
        doc_type='aip',
        fields='uuid,status'
    )
    for deleted_aip in deleted_aip_results['hits']['hits']:
        aips_deleted_or_pending_deletion.append(deleted_aip['fields']['uuid'][0])

    # Fetch results and paginate
    def es_pager(page, page_size):
        """
        Fetch one page of normalized entries from Elasticsearch.

        :param page: 1-indexed page to fetch
        :param page_size: Number of entries on a page
        :return: List of dicts for each entry, where keys and values have been cleaned up
        """
        start = (page - 1) * page_size
        results = conn.search(
            index='aips',
            doc_type='aip',
            body=elasticSearchFunctions.MATCH_ALL_QUERY,
            fields='origin,uuid,filePath,created,name,size',
            sort=sort_specification,
            size=page_size,
            from_=start,
        )
        # normalize results - each of the fields contains a single value,
        # but is returned from the ES API as a single-length array
        # e.g. {"fields": {"uuid": ["abcd"], "name": ["aip"] ...}}
        return [elasticSearchFunctions.normalize_results_dict(d) for d in results['hits']['hits']]

    items_per_page = 10
    count = conn.count(index='aips', doc_type='aip', body=elasticSearchFunctions.MATCH_ALL_QUERY)['count']
    results = LazyPagedSequence(es_pager, page_size=items_per_page, length=count)

    # Paginate
    page = helpers.pager(
        results,
        items_per_page,
        current_page_number
    )

    # process deletion, etc., and format results
    aips = []
    for aip in page.object_list:
        # If an AIP was deleted or is pending deletion, react if status changed
        if aip['uuid'] in aips_deleted_or_pending_deletion:
            # check with storage server to see current status
            api_results = storage_service.get_file_info(uuid=aip['uuid'])
            try:
                aip_status = api_results[0]['status']
            except IndexError:
                # Storage service does not know about this AIP
                # TODO what should happen here?
                logger.info("AIP not found in storage service: {}".format(aip))
                continue

            # delete AIP metadata in ElasticSearch if AIP has been deleted from the
            # storage server
            # TODO: handle this asynchronously
            if aip_status == 'DELETED':
                elasticSearchFunctions.delete_aip(aip['uuid'])
                elasticSearchFunctions.connect_and_delete_aip_files(aip['uuid'])
            elif aip_status != 'DEL_REQ':
                # update the status in ElasticSearch for this AIP
                elasticSearchFunctions.connect_and_mark_stored(aip['uuid'])
        else:
            aip_status = 'UPLOADED'

        # Tweak AIP presentation and add to display array
        if aip_status != 'DELETED':
            aip['status'] = AIP_STATUS_DESCRIPTIONS[aip_status]

            try:
                size = '{0:.2f} MB'.format(float(aip['size']))
            except (TypeError, ValueError):
                size = 'Removed'

            aip['size'] = size

            aip['href'] = aip['filePath'].replace(AIPSTOREPATH + '/', "AIPsStore/")
            aip['date'] = aip['created']

            aips.append(aip)

    total_size = total_size_of_aips(conn)

    return render(request, 'archival_storage/archival_storage.html',
        {
            'total_size': total_size,
            'aip_indexed_file_count': aip_indexed_file_count,
            'aips': aips,
            'page': page,
            'search_params': sort_params,
        }
    )

예제 #9

0

파일 보기

파일: views.py 프로젝트: Zabrane/archivematica

def list_display(request):
    current_page_number = request.GET.get('page', 1)

    form = forms.StorageSearchForm()

    # get ElasticSearch stats
    aip_indexed_file_count = advanced_search.indexed_count('aips')

    # get AIPs
    order_by = request.GET.get('order_by', 'name')
    sort_by  = request.GET.get('sort_by', 'up')

    if sort_by == 'down':
        sort_direction = 'desc'
    else:
        sort_direction = 'asc'

    sort_specification = order_by + ':' + sort_direction

    conn = elasticSearchFunctions.connect_and_create_index('aips')

    items_per_page = 10
    start = (int(current_page_number) - 1) * items_per_page

    aipResults = conn.search(
        pyes.Search(pyes.MatchAllQuery(), start=start, size=items_per_page),
        doc_types=['aip'],
        fields='origin,uuid,filePath,created,name,size',
        sort=sort_specification
    )

    try:
        len(aipResults)
    except pyes.exceptions.ElasticSearchException:
        # there will be an error if no mapping exists for AIPs due to no AIPs
        # having been created
        return render(request, 'archival_storage/archival_storage.html', locals())

    # handle pagination
    page = helpers.pager(
        aipResults,
        items_per_page,
        current_page_number
    )

    if not page:
        raise Http404

    # augment data
    sips = []
    for aip in page['objects']:
        sip = {}
        sip['href'] = aip.filePath.replace(AIPSTOREPATH + '/', "AIPsStore/")
        sip['name'] = aip.name
        sip['uuid'] = aip.uuid

        sip['date'] = aip.created

        try:
            size = float(aip.size)
            sip['size'] = '{0:.2f} MB'.format(size)
        except:
            sip['size'] = 'Removed'

        sips.append(sip)

    # get total size of all AIPS from ElasticSearch
    q = pyes.MatchAllQuery().search()
    q.facet.add(pyes.facets.StatisticalFacet('total', field='size'))
    aipResults = conn.search(q, doc_types=['aip'])
    total_size = aipResults.facets.total.total
    total_size = '{0:.2f}'.format(total_size)

    return render(request, 'archival_storage/archival_storage.html', locals())