Beispiel #1
0
def get_subscribers(targetings, hours_whitelist, volume):
    logger.debug("get_subscribers: getting subscribers")
    start_time = time.time()
    timezones = [tz
                 for tz in pytz.all_timezones
                 if datetime.now(pytz.timezone(tz)).hour in hours_whitelist]

    targetings.append({
        "field": "unsub",
        "operator": "NOT IN",
        "values": [1, "true"]
    })
    if timezones:
        targetings.append({
            "field": "timezone",
            "operator": "IN",
            "values": timezones
        })
    es_search = Search(using=es, index="users")
    operator_mappings = {
        'IN': 'must',
        'NOT IN': 'must_not',
    }

    es_query = Q()
    for condition in targetings:
        condition_pair = {condition["field"]: condition["values"]}
        terms_q = Q('terms', **condition_pair)
        bool_operator = operator_mappings[condition['operator']]
        bool_q = Q('bool', **{bool_operator: terms_q})
        es_query += bool_q
    es_search = es_search.query(es_query)
    es_search.query = dslq.FunctionScore(
        query=es_search.query,
        functions=[dslq.SF('random_score')],
        boost_mode="replace"
        )
    es_search = es_search[:volume]
    try:
        res = es_search.execute()
    except ElasticsearchException as e:
        logger.error(f"get_subscribers: Exception {e}")
    else:
        subscribers = []
        for row in res.hits:
            subscriber = row.to_dict()
            subscriber['_id'] = row.meta.id
            subscribers.append(subscriber)
        end_time = time.time()
        logger.debug(f"get_subscribers: finished in "
                     f"{int((end_time - start_time) * 1000)}ms")
        return subscribers
Beispiel #2
0
def function_score_content_query(query: Q.Query, content_types: List[AvailableContentTypes], boost: float=1.0) -> Q.Query:
    """
    Generate a function score query using ContentType weights
    :param query:
    :param content_types:
    :param boost:
    :return:
    """
    function_scores = []

    content_type: AvailableContentTypes
    for content_type in content_types:
        function_scores.append(content_type.value.filter_function())

    return Q.FunctionScore(query=query, functions=function_scores, boost=boost)
def custom_search_model(model,
                        query,
                        preview=False,
                        published=False,
                        id_field="id",
                        sort_pinned=True,
                        field_map={}):
    """Filter a model with the given filter.

    `field_map` translates incoming field names to the appropriate ES names.
    """
    if preview:
        func = preview_filter_from_query
    else:
        func = filter_from_query
    f = func(query, id_field=id_field, field_map=field_map)
    # filter by published
    if published:
        if f:
            f &= Range(published={"lte": timezone.now()})
        else:
            f = Range(published={"lte": timezone.now()})

    qs = model.search_objects.search(published=False)
    if f:
        qs = qs.filter(f)

    # possibly include a text query
    if query.get("query"):
        qs = qs.query("match", _all=query["query"])
    # set up pinned ids
    pinned_ids = query.get("pinned_ids")
    if pinned_ids and sort_pinned:

        pinned_query = es_query.FunctionScore(boost_mode="multiply",
                                              functions=[{
                                                  "filter":
                                                  Terms(id=pinned_ids),
                                                  "weight":
                                                  2
                                              }])

        qs = qs.query(pinned_query)
        qs = qs.sort("_score", "-published")
    else:
        qs = qs.sort("-published")
    return qs
Beispiel #4
0
def content_query(search_term, function_scores=None):
    """
    Returns the default ONS content query
    :param search_term:
    :param function_scores:
    :return:
    """
    q = query.DisMax(queries=[
        query.Bool(should=[
            match(fields.title_no_dates,
                  search_term,
                  type="boolean",
                  boost=10.0,
                  minimum_should_match="1<-2 3<80% 5<60%"),
            match(fields.title_no_stem,
                  search_term,
                  type="boolean",
                  boost=10.0,
                  minimum_should_match="1<-2 3<80% 5<60%"),
            multi_match([
                fields.title.field_name_boosted,
                fields.edition.field_name_boosted
            ],
                        search_term,
                        type="cross_fields",
                        minimum_should_match="3<80% 5<60%")
        ]),
        multi_match([fields.summary.name, fields.metaDescription.name],
                    search_term,
                    type="best_fields",
                    minimum_should_match="75%"),
        match(fields.keywords, search_term, type="boolean", operator="AND"),
        multi_match([fields.cdid.name, fields.datasetId.name], search_term),
        match(fields.searchBoost,
              search_term,
              type="boolean",
              operator="AND",
              boost=100.0)
    ])

    if function_scores is None:
        return q
    else:
        return query.FunctionScore(query=q, functions=function_scores)
Beispiel #5
0
def randomize_es(es_queryset):
    """Randomize an elasticsearch queryset."""
    return es_queryset.query(
        query.FunctionScore(
            functions=[function.RandomScore()])).sort("-_score")