def get_queryset_posts(self): """Search in posts, and remove result if the forum is not allowed for the user or if the message is invisible. Score is modified if: + post is the first one in a topic; + post is marked as "useful"; + post has a like/dislike ratio above (has more likes than dislikes) or below (the other way around) 1.0. """ query = Match(_type='post') \ & Terms(forum_pk=self.authorized_forums) \ & Term(is_visible=True) \ & MultiMatch(query=self.search_query, fields=['text_html']) functions_score = [ {'filter': Match(position=1), 'weight': settings.ZDS_APP['search']['boosts']['post']['if_first']}, {'filter': Match(is_useful=True), 'weight': settings.ZDS_APP['search']['boosts']['post']['if_useful']}, { 'filter': Range(like_dislike_ratio={'gt': 1}), 'weight': settings.ZDS_APP['search']['boosts']['post']['ld_ratio_above_1'] }, { 'filter': Range(like_dislike_ratio={'lt': 1}), 'weight': settings.ZDS_APP['search']['boosts']['post']['ld_ratio_below_1'] } ] scored_query = FunctionScore(query=query, boost_mode='multiply', functions=functions_score) return scored_query
def create_enum_range_min_max_filter(field, query_term): """Creates an ElasticSearch combined enum range filter. For example the fields `climbing_rating_min` and `climbing_rating_max` are combined into a single search field. Searching for `crat=4c,6b` returns the waypoints where the min/max climbing ratings match the given range. """ query_terms = query_term.split(',') map_enum = partial(map_enum_to_int, field._enum_mapper) range_values = list(map(map_enum, query_terms)) range_values = [t for t in range_values if t is not None] n = len(range_values) if n != 2: return None kwargs_start = {field.field_min: {'gt': range_values[1]}} kwargs_end = {field.field_max: {'lt': range_values[0]}} return Bool(must_not=Bool(should=[ Range(**kwargs_start), Range(**kwargs_end), Bool(must=[ Missing(field=field.field_min), Missing(field=field.field_max) ]) ]))
def inner(values): opening_date, closing_date = values[0].split("--") closing_date_in_range = Range( **{"closing_date": { "gte": opening_date, "lte": closing_date }}) opening_date_in_range = Range( **{"opening_date": { "gte": opening_date, "lte": closing_date }}) contains_range = Q( "bool", must=[ Range(**{"opening_date": { "lt": opening_date }}), Range(**{"closing_date": { "gt": closing_date }}), ], ) return Q( "bool", should=[ closing_date_in_range, opening_date_in_range, contains_range ], )
def get_queryset_posts(self): """Search in posts, and remove result if the forum is not allowed for the user or if the message is invisible. Score is modified if: + post is the first one in a topic; + post is marked as "useful"; + post has a like/dislike ratio above (has more likes than dislikes) or below (the other way around) 1.0. """ query = ( Match(_type="post") & Terms(forum_pk=self.authorized_forums) & Term(is_visible=True) & MultiMatch(query=self.search_query, fields=["text_html"]) ) functions_score = [ {"filter": Match(position=1), "weight": settings.ZDS_APP["search"]["boosts"]["post"]["if_first"]}, {"filter": Match(is_useful=True), "weight": settings.ZDS_APP["search"]["boosts"]["post"]["if_useful"]}, { "filter": Range(like_dislike_ratio={"gt": 1}), "weight": settings.ZDS_APP["search"]["boosts"]["post"]["ld_ratio_above_1"], }, { "filter": Range(like_dislike_ratio={"lt": 1}), "weight": settings.ZDS_APP["search"]["boosts"]["post"]["ld_ratio_below_1"], }, ] scored_query = FunctionScore(query=query, boost_mode="multiply", functions=functions_score) return scored_query
def test_create_filter_date(self): self.assertEqual(create_filter('idate', '', SearchImage), None) self.assertEqual(create_filter('idate', 'invalid date', SearchImage), None) self.assertEqual( create_filter('idate', '2016-01-01', SearchImage), Range(date_time={ 'gte': '2016-01-01', 'lte': '2016-01-01' })) self.assertEqual( create_filter('idate', '2016-01-01,invalid date', SearchImage), Range(date_time={ 'gte': '2016-01-01', 'lte': '2016-01-01' })) self.assertEqual( create_filter('idate', '2016-01-01,2016-01-01', SearchImage), Range(date_time={ 'gte': '2016-01-01', 'lte': '2016-01-01' })) self.assertEqual( create_filter('idate', '2016-01-01,2016-01-03', SearchImage), Range(date_time={ 'gte': '2016-01-01', 'lte': '2016-01-03' }))
def create_date_range_filter(field, query_term): """Creates an ElasticSearch date-range filter. This filter type is currently only used for Outing.date_start/date_end. Valid query terms are: 2016-01-01 2016-01-01,2016-01-01 2016-01-01,2016-01-03 """ query_terms = query_term.split(',') range_values = list(map(parse_date, query_terms)) range_values = [t for t in range_values if t is not None] n = len(range_values) if n == 0: return None elif n == 1 or range_values[0] == range_values[1]: # single date kwargs_start = {field.field_date_start: {'lte': range_values[0]}} kwargs_end = {field.field_date_end: {'gte': range_values[0]}} return Bool(must=[Range(**kwargs_start), Range(**kwargs_end)]) else: # date range kwargs_start = {field.field_date_start: {'gt': range_values[1]}} kwargs_end = {field.field_date_end: {'lt': range_values[0]}} return Bool(must_not=Bool( should=[Range( **kwargs_start), Range(**kwargs_end)]))
def get_filters(data): filters = [] if 'price_min' in data: filters.append(Range(price={"gte": data['price_min']})) if 'price_max' in data: filters.append(Range(price={"lte": data['price_max']})) if 'area_min' in data: filters.append(Range(area={"gte": data['area_min']})) if 'area_max' in data: filters.append(Range(area={"lte": data['area_max']})) if 'rooms' in data: filters.append(Terms(rooms=data['rooms'].split(','))) if 'city' in data: filters.append(Term(city=data['city'])) if 'balcony_type' in data: filters.append(Terms(balcony_type=data['balcony_type'].split(','))) if data.get('mortgage', False): filters.append(Term(mortgage=False)) if data.get('army_mortgage', False): filters.append(Term(army_mortgage=False)) return filters
def test_create_filter_integer_range(self): self.assertEqual( create_filter('not a valid field', '1200,2400', SearchRoute), None) self.assertEqual(create_filter('ele', '', SearchRoute), None) self.assertEqual(create_filter('ele', 'invalid term', SearchRoute), None) self.assertEqual(create_filter('ele', '1200', SearchRoute), None) self.assertEqual( create_filter('ele', '1200,invalid term', SearchRoute), None) self.assertEqual( create_filter('ele', 'invalid term,2400', SearchRoute), None) self.assertEqual( create_filter('ele', '1200,2400', SearchRoute), Bool(must_not=Bool(should=[ Range(elevation_min={'gt': 2400}), Range(elevation_max={'lt': 1200}), Bool(must=[ Missing(field='elevation_min'), Missing(field='elevation_max') ]) ]))) self.assertEqual( create_filter('height', '1200,2400', SearchWaypoint), Bool(must_not=Bool(should=[ Range(height_min={'gt': 2400}), Range(height_max={'lt': 1200}), Bool(must=[ Missing(field='height_min'), Missing(field='height_max') ]) ])))
def test_keyed_range_filter(): """Test range filter.""" range_query = {"None": {"lt": 1}, "1+": {"gte": 1}} rfilter = keyed_range_filter("field", range_query) assert rfilter(["None"]) == Range(field={"lt": 1}) assert rfilter(["1+"]) == Range(field={"gte": 1}) assert rfilter(["None", "1+"]) == Range(field={"gte": 1, "lt": 1})
def _get_query_for_range(self, sources, lookup, value): query = None # if set Q() as default it will be MatchAll() anytime for source in sources: if query is None: query = Range(**{source: {lookup: value}}) else: query = query | Range(**{source: {lookup: value}}) return query
def test_date_range_filter(app, input_date): """Test date range filter date validation and query.""" from_filter = date_range_filter("field", "gte") to_filter = date_range_filter("field", "lte") try: assert from_filter([input_date]) == Range(field={"gte": input_date}) assert to_filter([input_date]) == Range(field={"lte": input_date}) except: with pytest.raises(ValueError) as err: from_filter([input_date]) to_filter([input_date])
def test_create_filter_date_range(self): self.assertEqual(create_filter('date', '', SearchOuting), None) self.assertEqual(create_filter('date', 'invalid date', SearchOuting), None) self.assertEqual( create_filter('date', '2016-01-01', SearchOuting), Bool(must=[ Range(date_start={'lte': '2016-01-01'}), Range(date_end={'gte': '2016-01-01'}) ])) self.assertEqual( create_filter('date', '2016-01-01,invalid date', SearchOuting), Bool(must=[ Range(date_start={'lte': '2016-01-01'}), Range(date_end={'gte': '2016-01-01'}) ])) self.assertEqual( create_filter('date', '2016-01-01,2016-01-01', SearchOuting), Bool(must=[ Range(date_start={'lte': '2016-01-01'}), Range(date_end={'gte': '2016-01-01'}) ])) self.assertEqual( create_filter('date', '2016-01-01,2016-01-03', SearchOuting), Bool(must_not=Bool(should=[ Range(date_start={'gt': '2016-01-03'}), Range(date_end={'lt': '2016-01-01'}) ])))
def test_current_ranged_loans_filter(app): """Test ranged current loans filter.""" with app.app_context(): rfilter = overdue_loans_filter("field") current_loans_query = Terms( state=current_app.config["CIRCULATION_STATES_LOAN_ACTIVE"]) assert rfilter(["Overdue"]) == Range( field={"lt": str(arrow.utcnow().date())}) & current_loans_query assert rfilter(["Upcoming return"]) == Range( field={ "gte": str(arrow.utcnow().date()), "lte": str((arrow.utcnow() + timedelta(days=7)).date()) }) & current_loans_query
def create_date_filter(field, query_term): """Creates an ElasticSearch date-range filter for a single field. This filter type is currently only used for Image.date_time Valid query terms are: 2016-01-01 2016-01-01,2016-01-01 2016-01-01,2016-01-03 """ query_terms = query_term.split(',') range_values = list(map(parse_date, query_terms)) range_values = [t for t in range_values if t is not None] n = len(range_values) if n == 0: return None elif n == 1: range_values.append(range_values[0]) kwargs = { field._field_date: { 'gte': range_values[0], 'lte': range_values[1] } } return Range(**kwargs)
def inner(values): ineq_opers = [{ 'strict': 'gt', 'nonstrict': 'gte' }, { 'strict': 'lt', 'nonstrict': 'lte' }] range_query = [] for _range in values: range_ends = _range.split('--') range_args = dict() # Add the proper values to the dict for (range_end, strict, opers) in zip(range_ends, ['>', '<'], ineq_opers): # noqa if range_end: # If first char is '>' for start or '<' for end if range_end[0] == strict: dict_key = opers['strict'] range_end = range_end[1:] else: dict_key = opers['nonstrict'] range_args[dict_key] = range_end range_query.append(Range(**{field: range_args})) return Bool(should=range_query)
def etl(index='cf_rfem_hist_price', start_date='2018-12-26', end_date='2019-03-25', symbol='rfem'): ESLowLevelClientByConnection.get_instance() search = Search(index=index, using='high_level_client')[0:100] search.query = Q( Bool(must=[ Range(date={ 'gte': '2018-12-26', 'lte': '2019-03-25' }), Term(symbol='rfem') ])) aggs = A( DateHistogram(field='date', interval='1d', format='yyyy-MM-dd', min_doc_count=1)) response = search.execute() hits = response['hits'] hits = hits['hits'] XX = [] for hit in hits: X = [] X.append(hit['_source']['changeOverTime']) X.append(hit['_source']['changePercent']) X.append(hit['_source']['volume']) XX.append(X) return (XX)
def create_range_filter(field, query_term): """Creates an ElasticSearch range filter. E.g. the call `create_range_filter(elevation_field, '1500,2500') creates the following filter: {'range': {'elevation': {'gte': 1500, 'lte': 2500}}} """ query_terms = query_term.split(',') range_values = list(map(parse_num, query_terms)) n = len(range_values) range_from = range_values[0] if n > 0 else None range_to = range_values[1] if n > 1 else None if range_from is None and range_to is None: return None range_params = {} if range_from is not None and not math.isnan(range_from): range_params['gte'] = range_from if range_to is not None and not math.isnan(range_to): range_params['lte'] = range_to kwargs = {field._name: range_params} return Range(**kwargs)
def get_value_filter(self, filter_value): f, t = None, None try: if '-' in filter_value: f, t = filter_value.split('-', 1) else: t = f = int(filter_value) if not f: f = None else: f = int(f) if not t: t = None else: t = int(t) except ValueError: f, t = None, None limits = {} if f is not None: limits['gte'] = f if t is not None: limits['lte'] = t return Range(**{self._params['field']: limits})
def getUSWDSquery(indexbase, query, version, agency, domaintype, sort): index = indexbase + '-uswds2' try: query = int(query) except: query = 0 s = Search(using=es, index=index) if sort == 'Score': s = s.sort('-data.total_score') else: s = s.sort('domain') s = s.query(Bool(should=[Range(data__total_score={'gte': query})])) if version != 'all versions': if version == 'detected versions': s = s.query("query_string", query='v*', fields=['data.uswdsversion']) else: versionquery = '"' + version + '"' s = s.query("query_string", query=versionquery, fields=['data.uswdsversion']) if agency != 'All Agencies': agencyquery = '"' + agency + '"' s = s.query("query_string", query=agencyquery, fields=['agency']) if domaintype != 'All Branches': domaintypequery = '"' + domaintype + '"' s = s.query("query_string", query=domaintypequery, fields=['domaintype']) return s
def build_range_query(field, operator, value): """Crea una condición 'Range' para Elasticsearch. Args: field (str): Campo de la condición. value (int): Número contra el que se debería comparar el campo. operator (str): Operador a utilizar (>, =>, <, =<) Returns: Query: Condición Range para Elasticsearch """ if operator == '<': es_operator = 'lt' elif operator == '<=': es_operator = 'lte' elif operator == '>': es_operator = 'gt' elif operator == '>=': es_operator = 'gte' else: raise ValueError('Invalid operator.') options = {es_operator: value} return Range(**{field: options})
def create_enum_range_filter(field, query_term): """Creates an ElasticSearch enum range filter. E.g. the call `create_enum_range_filter(quality, 'medium,great') creates the following filter: {'range': {'quality': {'gte': 2, 'lte': 4}}} """ query_terms = query_term.split(',') map_enum = partial(map_enum_to_int, field._enum_mapper) range_values = list(map(map_enum, query_terms)) n = len(range_values) range_from = range_values[0] if n > 0 else None range_to = range_values[1] if n > 1 else None if range_from is None and range_to is None: return None range_params = {} if range_from is not None: range_params['gte'] = range_from if range_to is not None: range_params['lte'] = range_to kwargs = {field._name: range_params} return Range(**kwargs)
def inner(values): if len(values) != 1 or values[0].count('--') != 1 or values[0] == '--': raise RESTValidationError( errors=[FieldError(field, 'Invalid range format.')]) range_ends = values[0].split('--') range_args = dict() ineq_opers = [{'strict': 'gt', 'nonstrict': 'gte'}, {'strict': 'lt', 'nonstrict': 'lte'}] date_maths = [start_date_math, end_date_math] # Add the proper values to the dict for (range_end, strict, opers, date_math) in zip(range_ends, ['>', '<'], ineq_opers, date_maths): if range_end != '': # If first char is '>' for start or '<' for end if range_end[0] == strict: dict_key = opers['strict'] range_end = range_end[1:] else: dict_key = opers['nonstrict'] if date_math: range_end = '{0}||{1}'.format(range_end, date_math) range_args[dict_key] = range_end args = kwargs.copy() args.update(range_args) return Range(**{field: args})
def test_date_range_filter(app): """Test date range filter date validation and query.""" tests = ["", "a string", "2020-02-02"] for input_date in tests: from_filter = date_range_filter("field", "gte") to_filter = date_range_filter("field", "lte") try: assert from_filter([input_date ]) == Range(field={"gte": input_date}) assert to_filter([input_date]) == Range(field={"lte": input_date}) except (ValueError, AssertionError): with pytest.raises(ValueError): from_filter([input_date]) to_filter([input_date])
def test_range_filter(): """Test range filter.""" f = range_filter('test', start_date_math='startmath', end_date_math='endmath') assert f(['1821--1940']) == Range(test={ 'gte': '1821||startmath', 'lte': '1940||endmath', }) assert f(['>1821--']) == Range(test={'gt': '1821||startmath'}) assert f(['1821--<1940']) == Range(test={ 'gte': '1821||startmath', 'lt': '1940||endmath' }) assert pytest.raises(RESTValidationError, f, ['2016']) assert pytest.raises(RESTValidationError, f, ['--'])
def test_create_filter_range(self): self.assertEqual( create_filter('not a valid field', '1500,2500', SearchWaypoint), None) self.assertEqual(create_filter('walt', '', SearchWaypoint), None) self.assertEqual( create_filter('walt', 'not a, number', SearchWaypoint), None) self.assertEqual(create_filter('walt', '1500,2500', SearchWaypoint), Range(elevation={ 'gte': 1500, 'lte': 2500 })) self.assertEqual( create_filter('walt', '1500.5,2500.99', SearchWaypoint), Range(elevation={ 'gte': 1500.5, 'lte': 2500.99 })) self.assertEqual(create_filter('walt', '1500,', SearchWaypoint), Range(elevation={'gte': 1500})) self.assertEqual(create_filter('walt', '1500', SearchWaypoint), Range(elevation={'gte': 1500})) self.assertEqual(create_filter('walt', ',2500', SearchWaypoint), Range(elevation={'lte': 2500})) self.assertEqual(create_filter('walt', 'NaN,2500', SearchWaypoint), Range(elevation={'lte': 2500})) self.assertEqual(create_filter('walt', '1500,NaN', SearchWaypoint), Range(elevation={'gte': 1500}))
def main(): config_file = "config-cleaner.yml" logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', datefmt='%Y-%m-%d, %H:%M:%S') with open(config_file, 'r') as stream: try: config = yaml.safe_load(stream) logging.getLogger().setLevel(config['settings']['log-level']) thread_count = config['settings']['thread_count'] cluster = config['settings']['cluster'] index = config['settings']['index'] field = config['settings']['field'] years = config['settings']['years'] logging.info('Loaded settings started') except yaml.YAMLError as exc: logging.error(f"Cannot load file: {config_file} - Error: {exc}") exit() logging.getLogger('elasticsearch').setLevel(logging.WARN) logging.info(f"connecting to cluster {cluster} index {index}") client = Elasticsearch(list(cluster.split(","))) s = Search(using=client, index=index) total = s.count() old_documents = s.filter( # adapt to months if needed # https://elasticsearch-dsl.readthedocs.io/en/2.2.0/search_dsl.html#queries Range(**{field: { "lt": f"now-{years}y" }})) matches = old_documents.count() if matches < 1: logging.warning( f"no documents older than {years} year(s) found ({total} total)") return items_deleted = 0 items_failed = 0 logging.info( f"{matches} of {total} documents older than {years} year(s), deleting..." ) for success, info in parallel_bulk(client, delete_actions(old_documents.scan()), thread_count=thread_count, raise_on_exception=False, raise_on_error=False): if not success: logging.warning(f"failed: {info}") items_failed += 1 else: items_deleted += 1 if items_deleted % 10000 == 0: logging.info(f"deleted documents: {items_deleted}") logging.info( f"deleted: {items_deleted} failed: {items_failed} documents from index: {index}" )
def search_author_messages(self, author): """Search messages by author.""" s = MessageDoc.search() time_gte = 'now-1d' s = s.filter('match', **{'author.id': author.id}) \ .query(Range(timestamp={'gte': time_gte, 'lt': 'now'})) for message in s.scan(): print('-' * 40) print(message.to_dict())
def inner(values): args = {} for range_key, mappings in range_query.items(): if range_key in values: for key, value in mappings.items(): args[key] = value args.update(kwargs.copy()) return Range(**{field: args})
def inner(values): value = values and values[0] if value == "upcoming": today = datetime.utcnow().strftime("%Y-%m-%d") return Range(**{"opening_date": {"gte": today}}) if value == "all": return Q() return date_range_filter(values)
def inner(values): value = values and values[0] if value == "upcoming": now = datetime.utcnow().isoformat() return Range(**{"start_datetime": {"gte": now}}) if value == "all": return Q() return date_range_filter(values)