def search_terms(request): lang = request.GET.get('lang', settings.LANGUAGE_CODE) se = SearchEngineFactory().create() searchString = request.GET.get('q', '') user_is_reviewer = request.user.groups.filter(name='Resource Reviewer').exists() i = 0 ret = {} for index in ['terms', 'concepts']: query = Query(se, start=0, limit=0) boolquery = Bool() boolquery.should(Match(field='value', query=searchString.lower(), type='phrase_prefix')) boolquery.should(Match(field='value.folded', query=searchString.lower(), type='phrase_prefix')) boolquery.should(Match(field='value.folded', query=searchString.lower(), fuzziness='AUTO', prefix_length=settings.SEARCH_TERM_SENSITIVITY)) if user_is_reviewer is False and index == 'terms': boolquery.filter(Terms(field='provisional', terms=['false'])) query.add_query(boolquery) base_agg = Aggregation(name='value_agg', type='terms', field='value.raw', size=settings.SEARCH_DROPDOWN_LENGTH, order={"max_score": "desc"}) nodegroupid_agg = Aggregation(name='nodegroupid', type='terms', field='nodegroupid') top_concept_agg = Aggregation(name='top_concept', type='terms', field='top_concept') conceptid_agg = Aggregation(name='conceptid', type='terms', field='conceptid') max_score_agg = MaxAgg(name='max_score', script='_score') top_concept_agg.add_aggregation(conceptid_agg) base_agg.add_aggregation(max_score_agg) base_agg.add_aggregation(top_concept_agg) base_agg.add_aggregation(nodegroupid_agg) query.add_aggregation(base_agg) ret[index] = [] results = query.search(index=index) for result in results['aggregations']['value_agg']['buckets']: if len(result['top_concept']['buckets']) > 0: for top_concept in result['top_concept']['buckets']: top_concept_id = top_concept['key'] top_concept_label = get_preflabel_from_conceptid(top_concept['key'], lang)['value'] for concept in top_concept['conceptid']['buckets']: ret[index].append({ 'type': 'concept', 'context': top_concept_id, 'context_label': top_concept_label, 'id': i, 'text': result['key'], 'value': concept['key'] }) i = i + 1 else: ret[index].append({ 'type': 'term', 'context': '', 'context_label': get_resource_model_label(result), 'id': i, 'text': result['key'], 'value': result['key'] }) i = i + 1 return JSONResponse(ret)
def search_terms(request): lang = request.GET.get('lang', settings.LANGUAGE_CODE) se = SearchEngineFactory().create() searchString = request.GET.get('q', '') query = Query(se, start=0, limit=0) boolquery = Bool() boolquery.should(Match(field='value', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery.should(Match(field='value.folded', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery.should(Match(field='value.folded', query=searchString.lower(), fuzziness='AUTO')) query.add_query(boolquery) base_agg = Aggregation(name='value_agg', type='terms', field='value.raw', size=settings.SEARCH_DROPDOWN_LENGTH, order={"max_score": "desc"}) nodegroupid_agg = Aggregation(name='nodegroupid', type='terms', field='nodegroupid') top_concept_agg = Aggregation(name='top_concept', type='terms', field='top_concept') conceptid_agg = Aggregation(name='conceptid', type='terms', field='conceptid') max_score_agg = MaxAgg(name='max_score', script='_score') top_concept_agg.add_aggregation(conceptid_agg) base_agg.add_aggregation(max_score_agg) base_agg.add_aggregation(top_concept_agg) base_agg.add_aggregation(nodegroupid_agg) query.add_aggregation(base_agg) results = query.search(index='strings') or {'hits': {'hits':[]}} i = 0; ret = [] for result in results['aggregations']['value_agg']['buckets']: if len(result['top_concept']['buckets']) > 0: for top_concept in result['top_concept']['buckets']: top_concept_id = top_concept['key'] top_concept_label = get_preflabel_from_conceptid(top_concept['key'], lang)['value'] for concept in top_concept['conceptid']['buckets']: ret.append({ 'type': 'concept', 'context': top_concept_id, 'context_label': top_concept_label, 'id': i, 'text': result['key'], 'value': concept['key'] }) i = i + 1 else: ret.append({ 'type': 'term', 'context': '', 'context_label': '', 'id': i, 'text': result['key'], 'value': result['key'] }) i = i + 1 return JSONResponse(ret)
def search_terms(request): lang = request.GET.get("lang", settings.LANGUAGE_CODE) se = SearchEngineFactory().create() searchString = request.GET.get("q", "") user_is_reviewer = user_is_resource_reviewer(request.user) i = 0 ret = {} for index in ["terms", "concepts"]: query = Query(se, start=0, limit=0) boolquery = Bool() boolquery.should( Match(field="value", query=searchString.lower(), type="phrase_prefix")) boolquery.should( Match(field="value.folded", query=searchString.lower(), type="phrase_prefix")) boolquery.should( Match(field="value.folded", query=searchString.lower(), fuzziness="AUTO", prefix_length=settings.SEARCH_TERM_SENSITIVITY)) if user_is_reviewer is False and index == "terms": boolquery.filter(Terms(field="provisional", terms=["false"])) query.add_query(boolquery) base_agg = Aggregation(name="value_agg", type="terms", field="value.raw", size=settings.SEARCH_DROPDOWN_LENGTH, order={"max_score": "desc"}) nodegroupid_agg = Aggregation(name="nodegroupid", type="terms", field="nodegroupid") top_concept_agg = Aggregation(name="top_concept", type="terms", field="top_concept") conceptid_agg = Aggregation(name="conceptid", type="terms", field="conceptid") max_score_agg = MaxAgg(name="max_score", script="_score") top_concept_agg.add_aggregation(conceptid_agg) base_agg.add_aggregation(max_score_agg) base_agg.add_aggregation(top_concept_agg) base_agg.add_aggregation(nodegroupid_agg) query.add_aggregation(base_agg) ret[index] = [] results = query.search(index=index) if results is not None: for result in results["aggregations"]["value_agg"]["buckets"]: if len(result["top_concept"]["buckets"]) > 0: for top_concept in result["top_concept"]["buckets"]: top_concept_id = top_concept["key"] top_concept_label = get_preflabel_from_conceptid( top_concept["key"], lang)["value"] for concept in top_concept["conceptid"]["buckets"]: ret[index].append({ "type": "concept", "context": top_concept_id, "context_label": top_concept_label, "id": i, "text": result["key"], "value": concept["key"], }) i = i + 1 else: ret[index].append({ "type": "term", "context": "", "context_label": get_resource_model_label(result), "id": i, "text": result["key"], "value": result["key"], }) i = i + 1 return JSONResponse(ret)
def time_wheel_config(request): se = SearchEngineFactory().create() query = Query(se, limit=0) nested_agg = NestedAgg(path='dates', name='min_max_agg') nested_agg.add_aggregation(MinAgg(field='dates.date')) nested_agg.add_aggregation(MaxAgg(field='dates.date')) query.add_aggregation(nested_agg) results = query.search(index='resource') if results is not None and results['aggregations']['min_max_agg'][ 'min_dates.date']['value'] is not None and results['aggregations'][ 'min_max_agg']['max_dates.date']['value'] is not None: min_date = int(results['aggregations']['min_max_agg']['min_dates.date'] ['value']) / 10000 max_date = int(results['aggregations']['min_max_agg']['max_dates.date'] ['value']) / 10000 # round min and max date to the nearest 1000 years min_date = math.ceil(math.fabs(min_date) / 1000) * -1000 if min_date < 0 else math.floor( min_date / 1000) * 1000 max_date = math.floor(math.fabs(max_date) / 1000) * -1000 if max_date < 0 else math.ceil( max_date / 1000) * 1000 query = Query(se, limit=0) range_lookup = {} def gen_range_agg(gte=None, lte=None, permitted_nodegroups=None): date_query = Bool() date_query.filter( Range(field='dates.date', gte=gte, lte=lte, relation='intersects')) if permitted_nodegroups: date_query.filter( Terms(field='dates.nodegroup_id', terms=permitted_nodegroups)) date_ranges_query = Bool() date_ranges_query.filter( Range(field='date_ranges.date_range', gte=gte, lte=lte, relation='intersects')) if permitted_nodegroups: date_ranges_query.filter( Terms(field='date_ranges.nodegroup_id', terms=permitted_nodegroups)) wrapper_query = Bool() wrapper_query.should( Nested(path='date_ranges', query=date_ranges_query)) wrapper_query.should(Nested(path='dates', query=date_query)) return wrapper_query for millennium in range(int(min_date), int(max_date) + 1000, 1000): min_millenium = millennium max_millenium = millennium + 1000 millenium_name = "Millennium (%s - %s)" % (min_millenium, max_millenium) mill_boolquery = gen_range_agg( gte=ExtendedDateFormat(min_millenium).lower, lte=ExtendedDateFormat(max_millenium).lower, permitted_nodegroups=get_permitted_nodegroups(request.user)) millenium_agg = FiltersAgg(name=millenium_name) millenium_agg.add_filter(mill_boolquery) range_lookup[millenium_name] = [min_millenium, max_millenium] for century in range(min_millenium, max_millenium, 100): min_century = century max_century = century + 100 century_name = "Century (%s - %s)" % (min_century, max_century) cent_boolquery = gen_range_agg( gte=ExtendedDateFormat(min_century).lower, lte=ExtendedDateFormat(max_century).lower) century_agg = FiltersAgg(name=century_name) century_agg.add_filter(cent_boolquery) millenium_agg.add_aggregation(century_agg) range_lookup[century_name] = [min_century, max_century] for decade in range(min_century, max_century, 10): min_decade = decade max_decade = decade + 10 decade_name = "Decade (%s - %s)" % (min_decade, max_decade) dec_boolquery = gen_range_agg( gte=ExtendedDateFormat(min_decade).lower, lte=ExtendedDateFormat(max_decade).lower) decade_agg = FiltersAgg(name=decade_name) decade_agg.add_filter(dec_boolquery) century_agg.add_aggregation(decade_agg) range_lookup[decade_name] = [min_decade, max_decade] query.add_aggregation(millenium_agg) root = d3Item(name='root') results = {'buckets': [query.search(index='resource')['aggregations']]} results_with_ranges = appendDateRanges(results, range_lookup) transformESAggToD3Hierarchy(results_with_ranges, root) return JSONResponse(root, indent=4) else: return HttpResponseNotFound( _('Error retrieving the time wheel config'))
def time_wheel_config(self, user): se = SearchEngineFactory().create() query = Query(se, limit=0) nested_agg = NestedAgg(path="dates", name="min_max_agg") nested_agg.add_aggregation(MinAgg(field="dates.date")) nested_agg.add_aggregation(MaxAgg(field="dates.date")) query.add_aggregation(nested_agg) results = query.search(index=RESOURCES_INDEX) if (results is not None and results["aggregations"]["min_max_agg"] ["min_dates.date"]["value"] is not None and results["aggregations"] ["min_max_agg"]["max_dates.date"]["value"] is not None): min_date = int(results["aggregations"]["min_max_agg"] ["min_dates.date"]["value"]) / 10000 max_date = int(results["aggregations"]["min_max_agg"] ["max_dates.date"]["value"]) / 10000 # round min and max date to the nearest 1000 years min_date = math.ceil(math.fabs(min_date) / 1000) * -1000 if min_date < 0 else math.floor( min_date / 1000) * 1000 max_date = math.floor(math.fabs(max_date) / 1000) * -1000 if max_date < 0 else math.ceil( max_date / 1000) * 1000 query = Query(se, limit=0) range_lookup = {} def gen_range_agg(gte=None, lte=None, permitted_nodegroups=None): date_query = Bool() date_query.filter( Range(field="dates.date", gte=gte, lte=lte, relation="intersects")) if permitted_nodegroups is not None: date_query.filter( Terms(field="dates.nodegroup_id", terms=permitted_nodegroups)) date_ranges_query = Bool() date_ranges_query.filter( Range(field="date_ranges.date_range", gte=gte, lte=lte, relation="intersects")) if permitted_nodegroups is not None: date_ranges_query.filter( Terms(field="date_ranges.nodegroup_id", terms=permitted_nodegroups)) wrapper_query = Bool() wrapper_query.should( Nested(path="date_ranges", query=date_ranges_query)) wrapper_query.should(Nested(path="dates", query=date_query)) return wrapper_query date_tiers = { "name": "Millennium", "interval": 1000, "root": True, "child": { "name": "Century", "interval": 100, "child": { "name": "Decade", "interval": 10 } }, } if abs(int(min_date) - int(max_date)) > 1000: date_tiers = { "name": "Millennium", "interval": 1000, "root": True, "child": { "name": "Half-millennium", "interval": 500, "child": { "name": "Century", "interval": 100 } }, } if settings.TIMEWHEEL_DATE_TIERS is not None: date_tiers = settings.TIMEWHEEL_DATE_TIERS def add_date_tier(date_tier, low_date, high_date, previous_period_agg=None): interval = date_tier["interval"] name = date_tier["name"] within_range = True if "root" in date_tier: high_date = int(high_date) + interval for period in range(int(low_date), int(high_date), interval): min_period = period max_period = period + interval if "range" in date_tier: within_range = min_period >= date_tier["range"][ "min"] and max_period <= date_tier["range"]["max"] period_name = "{0} ({1} - {2})".format( name, min_period, max_period) nodegroups = self.get_permitted_nodegroups( user) if "root" in date_tier else None period_boolquery = gen_range_agg( gte=ExtendedDateFormat(min_period).lower, lte=ExtendedDateFormat(max_period).lower, permitted_nodegroups=nodegroups) period_agg = FiltersAgg(name=period_name) period_agg.add_filter(period_boolquery) if "root" not in date_tier: if within_range is True: previous_period_agg.add_aggregation(period_agg) range_lookup[period_name] = [min_period, max_period] if "child" in date_tier: add_date_tier(date_tier["child"], min_period, max_period, period_agg) if "root" in date_tier: query.add_aggregation(period_agg) add_date_tier(date_tiers, min_date, max_date) root = d3Item(name="root") results = { "buckets": [query.search(index=RESOURCES_INDEX)["aggregations"]] } results_with_ranges = self.appendDateRanges(results, range_lookup) self.transformESAggToD3Hierarchy(results_with_ranges, root) # calculate total number of docs for child in root.children: root.size = root.size + child.size if user.username in settings.CACHE_BY_USER: key = "time_wheel_config_{0}".format(user.username) cache.set(key, root, settings.CACHE_BY_USER[user.username]) return root
def time_wheel_config(request): se = SearchEngineFactory().create() query = Query(se, limit=0) query.add_aggregation(MinAgg(field='dates', format='y')) query.add_aggregation(MaxAgg(field='dates', format='y')) results = query.search(index='resource') if results is not None and results['aggregations']['min_dates'][ 'value'] is not None and results['aggregations']['max_dates'][ 'value'] is not None: min_date = int(results['aggregations']['min_dates']['value_as_string']) max_date = int(results['aggregations']['max_dates']['value_as_string']) # round min and max date to the nearest 1000 years min_date = math.ceil(math.fabs(min_date) / 1000) * -1000 if min_date < 0 else math.floor( min_date / 1000) * 1000 max_date = math.floor(math.fabs(max_date) / 1000) * -1000 if max_date < 0 else math.ceil( max_date / 1000) * 1000 query = Query(se, limit=0) for millennium in range(int(min_date), int(max_date) + 1000, 1000): min_millenium = millennium max_millenium = millennium + 1000 millenium_agg = DateRangeAgg(name="Millennium (%s-%s)" % (min_millenium, max_millenium), field='dates', format='y', min_date=str(min_millenium), max_date=str(max_millenium)) for century in range(min_millenium, max_millenium, 100): min_century = century max_century = century + 100 century_aggregation = DateRangeAgg(name="Century (%s-%s)" % (min_century, max_century), field='dates', format='y', min_date=str(min_century), max_date=str(max_century)) millenium_agg.add_aggregation(century_aggregation) for decade in range(min_century, max_century, 10): min_decade = decade max_decade = decade + 10 decade_aggregation = DateRangeAgg(name="Decade (%s-%s)" % (min_decade, max_decade), field='dates', format='y', min_date=str(min_decade), max_date=str(max_decade)) century_aggregation.add_aggregation(decade_aggregation) query.add_aggregation(millenium_agg) root = d3Item(name='root') transformESAggToD3Hierarchy( {'buckets': [query.search(index='resource')['aggregations']]}, root) return JSONResponse(root, indent=4) else: return HttpResponseNotFound( _('Error retrieving the time wheel config'))