Esempio n. 1
0
def videos(request):
    results = es.search(index=ES_INDEX, doc_type='videos', body={
        "size": 500,
        "query": {
            "wildcard" : {
                "number" : "GPH_3DP*"
            }
        },
        "sort" : "displaytext"
    })['hits']['hits']
    hits = []
    for r in results:
        hits.append(r['_source'])
    return render(request, 'search/explorevideos.html', {
        'results' : hits
    })
Esempio n. 2
0
def library(request):
    sort = request.GET.get('sort', 'name').encode('utf-8')
    if sort == 'name':
        results = es.search(index=ES_INDEX, doc_type='library', body={
            "size": 500,
            "sort": sort,
            "query": {
                "match_all" : {}
            }
        })['hits']['hits']
        current_letter = 'a'
        hits = []
        letter_docs = {}
        letter_docs[current_letter] = []
        for r in results:
            source = r['_source']
            if source['name'].encode('utf-8').lower().startswith(current_letter):
                letter_docs[current_letter].append(source)
            else:
                hits.append(letter_docs)
                current_letter = source['name'].encode('utf-8').lower()[0]
                letter_docs = {}
                letter_docs[current_letter] = []
                letter_docs[current_letter].append(source)
        hits.append(letter_docs)
    else:
        # year, format - TODO: title
        results = es.search(index=ES_INDEX, doc_type='pubdocs', body={
           "size": 0,
           "query": {
                "match_all": {}
           },
           "aggs": {
              "by_sort": {
                 "terms": {
                    "field": sort+".raw",
                    "order": {
                       "_term": "asc"
                    },
                    "size": 500
                 },
                 "aggs": {
                    "by_top_hit": {
                       "top_hits": {
                          "size": 100
                       }
                    }
                 }
              }
           }
        })['aggregations']['by_sort']['buckets']
        hits = []
        for r in results:
            sort_docs = {}
            key = r['key']
            sort_docs[key] = []
            docs = []
            sort_docs[key].append({'docs' : docs})
            for h in r['by_top_hit']['hits']['hits']:
                if ('pdf' in h['_source'] and h['_source']['pdf'] != ''):
                    docs.append({
                    'url' : h['_source']['pdf'],
                    'displaytext' : h['_source']['boilertext'],
                    'format' : h['_source']['format']
                    })
            if len(docs) > 0:
                hits.append(sort_docs)
    return render(request, 'search/library.html', {
        'results' : hits,
        'sort' : sort
    })
Esempio n. 3
0
def results(request):
    search_term = request.GET.get('q', '').encode('utf-8')
    sort = request.GET.get('sort', '_score').encode('utf-8')
    current_category = request.GET.get('category', '').encode('utf-8')
    current_subfacets = {}
    fields = {}
    if current_category:
        # check if there are subfacets for the currently selected category
        subcats = request.GET.getlist(current_category+'_facet', [])
        if subcats: current_subfacets[current_category] = {}
        for sc in subcats:
            parts = sc.split('_')
            subfacet = parts[0]
            term = parts[1]
            if subfacet not in current_subfacets[current_category]:
                current_subfacets[current_category][subfacet] = []
            current_subfacets[current_category][subfacet].append(term)

        # check if we have a field-specific search
        for key in request.GET:
            if key.startswith(current_category) and not key.endswith('_facet'):
                field_value = request.GET.get(key, '').encode('utf-8')
                parts = key.split('_')
                field = parts[1]
                fields[field] = field_value

    page = int(request.GET.get('page', 1))
    results_from = 0
    # calculate elasticsearch's from, using the page value
    results_from = (page - 1) * RESULTS_SIZE

    # check if user is trying to search by specific item number
    number_query = False
    number = None
    parts = search_term.split(':')
    categorystring = ""
    if len(parts) == 2:
        categorystring = parts[0]
        number = parts[1]
        number_query = True

    # values being passed to template
    hits = []
    all_categories = {}
    sub_facets = {}
    has_next = False
    has_previous = False
    previous_page_number = 0
    next_page_number = 0
    num_pages_range = []
    num_pages = 0
    total = 0

    if number_query:
        # user is searching for an exact item using its number
        # such as 'objects:HUMFA_27-5-1'
        search_results = es.search(index=ES_INDEX, doc_type=categorystring, body={
          "query": {
            "match": {
                "allnumbers": number
            }
          }
        })
        results_total = search_results['hits']['total']
        # user entered a number that only has one result for the given type, redirect to that page
        if results_total == 1:
            source = search_results['hits']['hits'][0].get('_source')
            return HttpResponseRedirect(reverse('get_type_html', args=(categorystring, source.get('id'), 'full')))
        #elif results_total == 0 and type == 'sites':
        else:
            # we have 0 or more than 1 result, treat it as a normal search result
            # this shouldn't happen, since we are doing a termed search on number
            # it expects an exact match
            for hit in search_results['hits']['hits']:
                hits.append({'id' : hit.get('_id'), 'type' : hit.get('_type'), 'source' : hit.get('_source')})
    else:
        # this is a normal search
        base_query = build_es_query(search_term, fields)
        bool_filter = build_bool(current_category, current_subfacets, '')
        subfacet_aggs = build_subfacet_aggs(current_category, current_subfacets, bool_filter)

        body_query = {
            "size" : 0,
            "query" : base_query,
            "aggregations" : subfacet_aggs,
            "post_filter" : {
                "bool" : bool_filter
            },
            "sort" : sort
        }
        facets_for_category = es.search(index=ES_INDEX, body=body_query)

        facet_names = []
        if current_subfacets:
            for facet_name in current_subfacets[current_category].keys():
                facet_names.append(facet_name)
        rec = recurse_aggs('', facets_for_category, [], facet_names)
        sub_facets[current_category] = rec

        search_results = es.search(index=ES_INDEX, body={
            "from": results_from,
            "size": RESULTS_SIZE,
            "query": base_query,
            "aggregations": {
                "aggregation": {
                    "terms": {
                        "field": "_type",
                        "exclude": "library", # ignore special type, library, which is used for the Digital Giza Library page
                        "size" : 50 # make sure to get all categories (rather than just 10)
                    }
                }
            },
            "post_filter" : {
                "bool" : bool_filter
            },
            "sort" : sort
        })
        all_categories['types'] = []
        for count in search_results['aggregations']['aggregation']['buckets']:
            all_categories['types'].append({
                'key' : count['key'],
                'doc_count' : count['doc_count'],
                'display_text' : CATEGORIES[count['key']]
                })
        for hit in search_results['hits']['hits']:
            hits.append({'id' : hit.get('_id'), 'type' : hit.get('_type'), 'source' : hit.get('_source')})

        total = search_results['hits']['total']

        num_pages = (total/RESULTS_SIZE) + (total % RESULTS_SIZE > 0)
        if num_pages > 0:
            num_pages_range = create_page_ranges(page, num_pages)

        if page > 1:
            has_previous = True
            previous_page_number = page - 1
        if page < num_pages:
            has_next = True
            next_page_number = page + 1

    # combine the current_subfacets into strings for quick comparison in template
    subfacet_strings = []
    if current_category and current_category in current_subfacets:
        for facet_name, facet_values in current_subfacets[current_category].items():
            for value in facet_values:
                subfacet_strings.append("%s_%s_%s" % (current_category, facet_name, value))

    search_params = []
    if search_term:
        search_params.append(('q', 'Keyword', search_term))
    if current_category:
        for k, v in fields.items():
            if v:
                search_params.append((current_category+'_'+k, FIELDS_PER_CATEGORY[current_category][k], v))

    return render(request, 'search/results.html', {
        'search_params' : search_params,
        'hits' : hits,
        'all_categories' : all_categories,
        'CATEGORIES' : CATEGORIES,
        'sub_facets' : sub_facets,
        'current_subfacets' : current_subfacets,
        'subfacet_strings' : subfacet_strings,
        'total' : total,
        'has_previous' : has_previous,
        'previous_page_number' : previous_page_number,
        'has_next' : has_next,
        'next_page_number' : next_page_number,
        'num_pages_range' : num_pages_range,
        'num_pages' : num_pages,
        'current_page' : str(page),
        'current_category' : current_category
    })
Esempio n. 4
0
def collection(request, slug):
    collection = get_object_or_404(Collection, slug=slug)
    items = []
    hits = []

    search_term = request.GET.get('q', '')
    query = {}

    if collection.items.all():
        query = {
            'bool': {
                "should": [],
            }
        }
        for elasticsearch_item in collection.items.all():
            query['bool']['should'].append({
                'bool': {
                    'must': [
                        {
                            'term': {
                                "_type": elasticsearch_item.type,
                            }
                        },
                        {
                            'term': {
                                "_id": elasticsearch_item.es_id,
                            }
                        },
                    ]
                }
            })
    else:
        # pass a query that will get no values returned
        query = {
            'ids': {
                'type': '_doc',
                'values': []
            }
        }

    categorystring = ""
    current_category = request.GET.get('category', '')
    current_subfacets = {}
    bool_filter = {
        "must" : [],
    }
    sort = request.GET.get('sort', '_score')
    page = int(request.GET.get('page', 1))
    results_from = 0
    # calculate elasticsearch's from, using the page value
    results_from = (page - 1) * RESULTS_SIZE
    all_categories = {}
    sub_facets = {}
    has_next = False
    has_previous = False
    previous_page_number = 0
    next_page_number = 0
    num_pages_range = []
    num_pages = 0
    total = 0

    body_query = {
        "from": results_from,
        "size": RESULTS_SIZE,
        "query": query,
        "aggregations": {
            "aggregation": {
                "terms": {
                    "field": "_type",
                    "exclude": "library", # ignore special type, library, which is used for the Digital Giza Library page
                    "size" : 50 # make sure to get all categories (rather than just 10)
                }
            }
        },
        "post_filter" : {
            "bool" : bool_filter
        },
        "sort" : sort
    }

    subfacet_aggs = build_subfacet_aggs(current_category, current_subfacets, bool_filter)
    facets_for_category = es.search(index=ES_INDEX, body=body_query)

    facet_names = []
    if current_subfacets:
        for facet_name in list(current_subfacets[current_category].keys()):
            facet_names.append(facet_name)
    rec = recurse_aggs('', facets_for_category, [], facet_names)
    sub_facets[current_category] = rec

    search_results = es.search(index=ES_INDEX, body={
        "from": results_from,
        "size": 10000, 
        "query": query,
        "aggregations": {
            "aggregation": {
                "terms": {
                    "field": "_type",
                    "exclude": "library", # ignore special type, library, which is used for the Digital Giza Library page
                    "size" : 50 # make sure to get all categories (rather than just 10)
                }
            }
        },
        "post_filter" : {
            "bool" : bool_filter
        },
        "sort" : sort
    })
    all_categories['types'] = []
    for count in search_results['aggregations']['aggregation']['buckets']:
         all_categories['types'].append({
            'key' : count['key'],
            'doc_count' : count['doc_count'],
            'display_text' : CATEGORIES[count['key']]
        })
    for hit in search_results['hits']['hits']:
        hits.append({'id' : hit.get('_id'), 'type' : hit.get('_type'), 'source' : hit.get('_source')})

    total = search_results['hits']['total']

    num_pages = (total // RESULTS_SIZE) + (total % RESULTS_SIZE > 0)
    if num_pages > 0:
        num_pages_range = create_page_ranges(page, num_pages)

    if page > 1:
        has_previous = True
        previous_page_number = page - 1
    if page < num_pages:
        has_next = True
        next_page_number = page + 1

    # combine the current_subfacets into strings for quick comparison in template
    subfacet_strings = []
    if current_category and current_category in current_subfacets:
        for facet_name, facet_values in list(current_subfacets[current_category].items()):
            for value in facet_values:
                subfacet_strings.append("%s_%s_%s" % (current_category, facet_name, value))

    search_params = []
    if search_term:
        search_params.append(('q', 'Keyword', search_term))
    if current_category:
        for k, v in list(fields.items()):
            if v:
                search_params.append((current_category+'_'+k, FIELDS_PER_CATEGORY[current_category][k], v))

    return render(request, 'pages/mygiza-collection.html', {
        'collection': collection,
        'search_params' : search_params,
        'hits' : hits,
        'sub_facets' : sub_facets,
        'current_subfacets' : current_subfacets,
        'subfacet_strings' : subfacet_strings,
        'total' : total,
        'has_previous' : has_previous,
        'previous_page_number' : previous_page_number,
        'has_next' : has_next,
        'next_page_number' : next_page_number,
        'num_pages_range' : num_pages_range,
        'num_pages' : num_pages,
    })
Esempio n. 5
0
def search(request):
    search_query = request.GET.get('q', None)
    type = request.GET.get('type', '')
    page = int(request.GET.get('page', 1))
    results_from = 0
    # calculate elasticsearch's from, using the page value
    results_from = (page - 1) * RESULTS_SIZE
    print search_query, type, page, results_from

    # check if user is trying to search by specific item number
    number_query = False
    number = None
    parts = search_query.split(':')
    if len(parts) == 2:
        type = parts[0]
        number = parts[1]
        number_query = True

    # values being passed to template
    hits = []
    facets = {}
    has_next = False
    has_previous = False
    previous_page_number = 0
    next_page_number = 0
    total = 0

    if number_query:
        # user is searching for an exact item using its number
        # such as 'finds:HUMFA_27-5-1'
        search_results = es.search(index=ES_INDEX, doc_type=type, body={
          "query": {
            "term": {
                "number": number 
            }
          }
        })
        results_total = search_results['hits']['total']
        # user entered a number that only has one result for the given type, redirect to that page
        if results_total == 1:
            source = search_results['hits']['hits'][0].get('_source')
            return HttpResponseRedirect(reverse(type, args=(source.get('id'),)))
        #elif results_total == 0 and type == 'sites':
        else:
            # we have 0 or more than 1 result, treat it as a normal search result
            # this shouldn't happen, since we are doing a termed search on number
            # it expects an exact match
            for hit in search_results['hits']['hits']:
                hits.append({'id' : hit.get('_id'), 'type' : hit.get('_type'), 'source' : hit.get('_source')})
    else:
        # this is a normal search, just aggregate by type
        ## ADD aggregations per TYPE
        search_results = es.search(index=ES_INDEX, doc_type=type, body={
            "from": results_from,
            "size": RESULTS_SIZE,
            "query": {
                "match_phrase": {
                   "_all": search_query
                }
            },
            "aggregations": {
                "aggregation": {
                    "terms": {
                        "field": "_type"
                    }
                }
            }
        })
        for count in search_results['aggregations']['aggregation']['buckets']:
            facets[count['key']] = {
                'doc_count' : count['doc_count'],
                'display_text' : RELATED_DISPLAY_TEXT[count['key']]
                }
        
        for hit in search_results['hits']['hits']:
            hits.append({'id' : hit.get('_id'), 'type' : hit.get('_type'), 'source' : hit.get('_source')})
        
        total = search_results['hits']['total']
        
        if page > 1:
            has_previous = True
            previous_page_number = page - 1
        if (total / RESULTS_SIZE > page) or (total / RESULTS_SIZE == page and total % RESULTS_SIZE > 0):
            has_next = True
            next_page_number = page + 1
    
    return render(request, 'search/search.html', {
        'search_query' : search_query,
        'hits' : hits,
        'facets' : facets,
        'total' : total,
        'has_previous' : has_previous,
        'previous_page_number' : previous_page_number,
        'has_next' : has_next,
        'next_page_number' : next_page_number,
        'type' : type
    })
Esempio n. 6
0
def results(request):
    search_term = request.GET.get('q', '').encode('utf-8')
    sort = request.GET.get('sort', '_score').encode('utf-8')
    current_category = request.GET.get('category', '').encode('utf-8')
    current_subfacets = {}
    fields = {}
    if current_category:
        # check if there are subfacets for the currently selected category
        subcats = request.GET.getlist(current_category+'_facet', [])
        if subcats: current_subfacets[current_category] = {}
        for sc in subcats:
            parts = sc.split('_')
            subfacet = parts[0]
            term = parts[1]
            if subfacet not in current_subfacets[current_category]:
                current_subfacets[current_category][subfacet] = []
            current_subfacets[current_category][subfacet].append(term)

        # check if we have a field-specific search
        for key in request.GET:
            if key.startswith(current_category) and not key.endswith('_facet'):
                field_value = request.GET.get(key, '').encode('utf-8')
                parts = key.split('_')
                field = parts[1]
                fields[field] = field_value

    page = int(request.GET.get('page', 1))
    results_from = 0
    # calculate elasticsearch's from, using the page value
    results_from = (page - 1) * RESULTS_SIZE

    # check if user is trying to search by specific item number
    number_query = False
    number = None
    parts = search_term.split(':')
    categorystring = ""
    if len(parts) == 2:
        categorystring = parts[0]
        number = parts[1]
        number_query = True

    # values being passed to template
    hits = []
    all_categories = {}
    sub_facets = {}
    has_next = False
    has_previous = False
    previous_page_number = 0
    next_page_number = 0
    num_pages_range = []
    num_pages = 0
    total = 0

    if number_query:
        # user is searching for an exact item using its number
        # such as 'objects:HUMFA_27-5-1'
        search_results = es.search(index=ES_INDEX, doc_type=categorystring, body={
          "query": {
            "term": {
                "number": number
            }
          }
        })
        results_total = search_results['hits']['total']
        # user entered a number that only has one result for the given type, redirect to that page
        if results_total == 1:
            source = search_results['hits']['hits'][0].get('_source')
            return HttpResponseRedirect(reverse('get_type_html', args=(categorystring, source.get('id'), 'intro')))
        #elif results_total == 0 and type == 'sites':
        else:
            # we have 0 or more than 1 result, treat it as a normal search result
            # this shouldn't happen, since we are doing a termed search on number
            # it expects an exact match
            for hit in search_results['hits']['hits']:
                hits.append({'id' : hit.get('_id'), 'type' : hit.get('_type'), 'source' : hit.get('_source')})
    else:
        # this is a normal search
        base_query = build_es_query(search_term, fields)
        bool_filter = build_bool(current_category, current_subfacets, '')
        subfacet_aggs = build_subfacet_aggs(current_category, current_subfacets, bool_filter)

        body_query = {
            "size" : 0,
            "query" : base_query,
            "aggregations" : subfacet_aggs,
            "post_filter" : {
                "bool" : bool_filter
            },
            "sort" : sort
        }
        facets_for_category = es.search(index=ES_INDEX, body=body_query)

        facet_names = []
        if current_subfacets:
            for facet_name in current_subfacets[current_category].keys():
                facet_names.append(facet_name)
        rec = recurse_aggs('', facets_for_category, [], facet_names)
        sub_facets[current_category] = rec

        search_results = es.search(index=ES_INDEX, body={
            "from": results_from,
            "size": RESULTS_SIZE,
            "query": base_query,
            "aggregations": {
                "aggregation": {
                    "terms": {
                        "field": "_type"
                    }
                }
            },
            "post_filter" : {
                "bool" : bool_filter
            },
            "sort" : sort
        })
        all_categories['types'] = []
        for count in search_results['aggregations']['aggregation']['buckets']:
            all_categories['types'].append({
                'key' : count['key'],
                'doc_count' : count['doc_count'],
                'display_text' : CATEGORIES[count['key']]
                })
        for hit in search_results['hits']['hits']:
            hits.append({'id' : hit.get('_id'), 'type' : hit.get('_type'), 'source' : hit.get('_source')})

        total = search_results['hits']['total']

        num_pages = (total/RESULTS_SIZE) + (total % RESULTS_SIZE > 0)
        if num_pages > 0:
            num_pages_range = create_page_ranges(page, num_pages)

        if page > 1:
            has_previous = True
            previous_page_number = page - 1
        if page < num_pages:
            has_next = True
            next_page_number = page + 1

    # combine the current_subfacets into strings for quick comparison in template
    subfacet_strings = []
    if current_category and current_category in current_subfacets:
        for facet_name, facet_values in current_subfacets[current_category].items():
            for value in facet_values:
                subfacet_strings.append("%s_%s_%s" % (current_category, facet_name, value))

    search_params = []
    if search_term:
        search_params.append(('q', 'Keyword', search_term))
    if current_category:
        for k, v in fields.items():
            if v:
                search_params.append((current_category+'_'+k, FIELDS_PER_CATEGORY[current_category][k], v))

    return render(request, 'search/results.html', {
        'search_params' : search_params,
        'hits' : hits,
        'all_categories' : all_categories,
        'CATEGORIES' : CATEGORIES,
        'sub_facets' : sub_facets,
        'current_subfacets' : current_subfacets,
        'subfacet_strings' : subfacet_strings,
        'total' : total,
        'has_previous' : has_previous,
        'previous_page_number' : previous_page_number,
        'has_next' : has_next,
        'next_page_number' : next_page_number,
        'num_pages_range' : num_pages_range,
        'num_pages' : num_pages,
        'current_page' : str(page),
        'current_category' : current_category
    })
Esempio n. 7
0
def library(request):
    sort = request.GET.get('sort', 'name').encode('utf-8')
    if sort == 'name':
        results = es.search(index=ES_INDEX, doc_type='library', body={
            "size": 500,
            "sort": sort,
            "query": {
                "match_all" : {}
            }
        })['hits']['hits']
        current_letter = 'a'
        hits = []
        letter_docs = {}
        letter_docs[current_letter] = []
        for r in results:
            source = r['_source']
            if source['name'].encode('utf-8').lower().startswith(current_letter):
                letter_docs[current_letter].append(source)
            else:
                hits.append(letter_docs)
                current_letter = source['name'].encode('utf-8').lower()[0]
                letter_docs = {}
                letter_docs[current_letter] = []
                letter_docs[current_letter].append(source)
        hits.append(letter_docs)
    else:
        # year, format - TODO: title
        results = es.search(index=ES_INDEX, doc_type='pubdocs', body={
           "size": 0,
           "query": {
                "match_all": {}
           },
           "aggs": {
              "by_sort": {
                 "terms": {
                    "field": sort+".raw",
                    "order": {
                       "_term": "asc"
                    },
                    "size": 500
                 },
                 "aggs": {
                    "by_top_hit": {
                       "top_hits": {
                          "size": 100
                       }
                    }
                 }
              }
           }
        })['aggregations']['by_sort']['buckets']
        hits = []
        for r in results:
            sort_docs = {}
            key = r['key']
            sort_docs[key] = []
            docs = []
            sort_docs[key].append({'docs' : docs})
            for h in r['by_top_hit']['hits']['hits']:
                if ('pdf' in h['_source'] and h['_source']['pdf'] != ''):
                    docs.append({
                    'url' : h['_source']['pdf'],
                    'displaytext' : h['_source']['boilertext'],
                    'format' : h['_source']['format']
                    })
            if len(docs) > 0:
                hits.append(sort_docs)
    return render(request, 'search/library.html', {
        'results' : hits,
        'sort' : sort
    })