Ejemplo n.º 1
0
def adjust_search_for_percolator(search):
    """
    Returns an updated Search which can be used with percolator.

    Percolated queries can only store the query portion of the search object
    (see https://github.com/elastic/elasticsearch/issues/19680). This will modify the original search query
    to add post_filter arguments to the query part of the search. Then all parts of the Search other than
    query will be removed.

    Args:
        search (Search): A search object

    Returns:
        Search: updated search object
    """
    search_dict = search.to_dict()
    if 'post_filter' in search_dict:
        search = search.filter(search_dict['post_filter'])

    # Remove all other keys besides query
    updated_search_dict = {}
    search_dict = search.to_dict()
    if 'query' in search_dict:
        updated_search_dict['query'] = search_dict['query']
    updated_search = Search(index=search._index)  # pylint: disable=protected-access
    updated_search.update_from_dict(updated_search_dict)
    return updated_search
Ejemplo n.º 2
0
    def _search_iterator(self,
                         q,
                         start,
                         max,
                         count,
                         sort,
                         level='Package',
                         include=False):
        if max and start + max > 10_000:
            raise Exception(
                'Pagination beyond 10000 hits not allowed, use empty max parameter to retrieve full set'
            )

        index = self.index_map.get(level, self.index_name)

        #print(index, self.index_map, flush=True)
        s = Search(using=self.elastic, index=index)
        s.extra(track_total_hits=True)
        s.update_from_dict(q)
        s.source(include)

        m = max or count

        for hit in s[start:start + m] if start + m <= 10_000 else s.scan():
            yield hit.meta.id if not include else (hit.meta.id,
                                                   self._hit_to_desc(hit))
Ejemplo n.º 3
0
def adjust_search_for_percolator(search):
    """
    Returns an updated Search which can be used with percolator.

    Percolated queries can only store the query portion of the search object
    (see https://github.com/elastic/elasticsearch/issues/19680). This will modify the original search query
    to add post_filter arguments to the query part of the search. Then all parts of the Search other than
    query will be removed.

    Args:
        search (Search): A search object

    Returns:
        Search: updated search object
    """
    search_dict = search.to_dict()
    if 'post_filter' in search_dict:
        search = search.filter(search_dict['post_filter'])

    # Remove all other keys besides query
    updated_search_dict = {}
    search_dict = search.to_dict()
    if 'query' in search_dict:
        updated_search_dict['query'] = search_dict['query']
    updated_search = Search(index=search._index)  # pylint: disable=protected-access
    updated_search.update_from_dict(updated_search_dict)
    return updated_search
Ejemplo n.º 4
0
 def retrieve_library_doc(self,
                          keywords,
                          size=10,
                          search_fields=["title^3", "tags^3",
                                         "paragraphs"]):
     if type(keywords) == str:
         keywords = [keywords]
     query = " ".join(keywords)
     s = Search(index=self.index).using(self.es)
     s.update_from_dict({
         "query": {
             "simple_query_string": {
                 "fields": search_fields,
                 "query": query
             }
         }
     })
     res = s.execute()
     l = []
     for d in res.hits:
         l.append(
             CMKBDLibraryDocument(url=d.url,
                                  title=d.title,
                                  body=d.body,
                                  tags=d.tags,
                                  paragraphs=[s for s in d.paragraphs
                                              ]).to_json())
     return l
Ejemplo n.º 5
0
 def get_result_history(self, prefix, start, limit):
     """
     ES handle to implement the get_result_history functionality of RunResult
     """
     doc = {
         'from': start,
         'size': limit,
         'sort': {
             'uuid': {
                 'order': 'desc'
             }
         },
         'query': {
             'bool': {
                 'must_not': [{
                     'term': {
                         '_id': prefix + '/primary.json'
                     }
                 }, {
                     'term': {
                         '_id': prefix + '/latest.json'
                     }
                 }],
                 'filter': {
                     # use MATCH so our 'prefix' is analyzed like the source field 'name', see mapping
                     'match': {
                         'name': prefix
                     }
                 }
             }
         }
     }
     search = Search(using=self.es, index=self.index)
     search.update_from_dict(doc)
     return self.search(search)
Ejemplo n.º 6
0
 def query(self,
           q,
           kind='term',
           field='email.raw',
           offset=0,
           limit=20,
           indices=[INDICES_ALL]):
     search = Search(using=self.es, index=indices)
     if isinstance(q, dict):
         search.update_from_dict(q)
     else:
         if kind == 'default':
             search.query = (Term(**{'email.raw': {
                 'value': q,
                 'boost': 3
             }}) | Term(**{'username.raw': {
                 'value': q,
                 'boost': 2
             }}) | Match(username={'query': q}))
         else:
             cls_ = QUERY_KINDS[kind]
             search = search.query(cls_(**{field: q}))
     results = search[offset:offset + limit].execute()
     return dict(results=results,
                 total=results.hits.total.value,
                 hits_relation=results.hits.total.relation,
                 took=results.took,
                 timed_out=results.timed_out)
Ejemplo n.º 7
0
def query_names_with_exact_matches(index, field, search_phrase, max_return=30):
    result = []
    normalized_search_phrase = re.sub(r'\W+', ' ', search_phrase).lower()
    # to reduce the search time, we only allow exact string match

    q = {
      "size": max_return,
      "sort": [
        { "CitationCount" : "desc" }
      ],
      "query":{
          "match": {
            field: {
              "query": normalized_search_phrase,
              "operator": "and"
            }
          }
        }
      }
    s = Search(using=client, index=index).params(preserve_order=True)
    s.update_from_dict(q)

    count = 0
    for res in s.scan():
        if count >= max_return:
            break
        result.append(res.to_dict())
        count += 1
    return result
Ejemplo n.º 8
0
 def list_all_keys_w_prefix(self, prefix):
     """
     Lists all id's in this ES that have the given prefix.
     Only gets ES_SEARCH_SIZE number of results, most recent first.
     """
     doc = {
         'size': self.ES_SEARCH_SIZE,
         'query': {
             'bool': {
                 'filter': {
                     'term': {
                         'name': prefix
                     }
                 }
             }
         },
         'sort': {
             'uuid': {
                 'order': 'desc'
             }
         }
     }
     search = Search(using=self.es, index=self.index)
     search.update_from_dict(doc)
     return self.search(search, key='_id')
Ejemplo n.º 9
0
 def _prepare_search(cls, query, fields=None):
     if fields and not (isinstance(fields, collections.Iterable) and
             not isinstance(fields, basestring)):
         fields = [fields]
     fieldnames = ([_get_fieldname(f) for f in fields] if fields else
                  cls._es_string_fieldnames_with_boost)
     s = ES_Search().doc_type(cls._es_doctype)
     # Get only ids to avoid json decode issues
     s.update_from_dict({'fields': 'id'})
     return s, fieldnames
Ejemplo n.º 10
0
def search_graphs1(request,
                   owner_email=None,
                   names=None,
                   nodes=None,
                   edges=None,
                   tags=None,
                   member_email=None,
                   is_public=None,
                   query=None,
                   limit=20,
                   offset=0,
                   order='desc',
                   sort='name'):
    sort_attr = getattr(db.Graph, sort if sort is not None else 'name')
    orber_by = getattr(db, order if order is not None else 'desc')(sort_attr)
    is_public = int(is_public) if is_public is not None else None

    if member_email is not None:
        member_user = users.controllers.get_user(request, member_email)
        if member_user is not None:
            group_ids = [
                group.id
                for group in users.controllers.get_groups_by_member_id(
                    request, member_user.id)
            ]
        else:
            raise Exception("User with given member_email doesnt exist.")
    else:
        group_ids = None

    if edges is not None:
        edges = [tuple(edge.split(':')) for edge in edges]

    if 'query' in query:
        s = Search(using=settings.ELASTIC_CLIENT, index='graphs')
        s.update_from_dict(query)
        s.source(False)
        graph_ids = [int(hit.meta.id) for hit in s.scan()]
    else:
        graph_ids = None

    total, graphs_list = db.find_graphs(request.db_session,
                                        owner_email=owner_email,
                                        graph_ids=graph_ids,
                                        is_public=is_public,
                                        group_ids=group_ids,
                                        names=names,
                                        nodes=nodes,
                                        edges=edges,
                                        tags=tags,
                                        limit=limit,
                                        offset=offset,
                                        order_by=orber_by)

    return total, graphs_list
Ejemplo n.º 11
0
def get_documents(index):
    result = []
    q = {
      "query": {
        "match_all": {}
      }}

    s = Search(using=client, index=index)
    s.update_from_dict(q)
    for hit in s.scan():
        result.append(hit.to_dict())
    return result
Ejemplo n.º 12
0
 def delete_all_docs(self):
     s = Search(index=self.index).using(self.es)
     s.update_from_dict({"query": {"match_all": {}}, "size": 1000})
     response = s.execute()
     actions = []
     for h in response.hits:
         actions.append({
             '_op_type': 'delete',
             "_index": self.index,
             "_id": h.meta.id,
             '_type': self.doc_type
         })
     status, _ = bulk(self.es, actions)
     print("delete")
     print(status)
Ejemplo n.º 13
0
 def get_search(self, search: dict = None):
     """Get Search object from ElasticSearch"""
     if not current_identity:
         return None
     else:
         try:
             index = "report-{}".format(
                 current_identity.get_client().token).lower()
             s = Search(using=self.es, index=index)
             if search:
                 s.update_from_dict(search)
             return s
         except ElasticsearchException:
             logging.exception("Elasticsearch exception")
             return None
Ejemplo n.º 14
0
def get_ratings_by_viewer(viewer_id, limit=100):
    ratings_search = Search(using=es, index=MOVIES_INDEX)

    # This uses the `update_from_dict` technique—useful for cases where you’d rather not navigate
    # the function calls due to the complexity of the query object.
    ratings_search.update_from_dict({
        '_source': {
            'excludes': ['ratings']
        },
        'size':
        100,
        'query': {
            'nested': {
                'path': 'ratings',
                'inner_hits': {},
                'query': {
                    'match': {
                        'ratings.viewer_id': viewer_id
                    }
                }
            }
        },
        'sort': [
            {
                'ratings.date_rated':
                {  # Sorts based on a nested object can be…involved.
                    'order': 'asc',
                    'nested': {
                        'path': 'ratings',
                        'filter': {
                            # And yes, the filter does have to be repeated. See
                            # /reference/current/sort-search-results.html#nested-sorting
                            # in https://www.elastic.co/guide/en/elasticsearch
                            'match': {
                                'ratings.viewer_id': viewer_id
                            }
                        }
                    }
                }
            },
            'title.keyword'
        ]
    })

    # Here, we demonstrate the approach of restructuring the raw results into something
    # that will be simpler for the caller to use.
    response = ratings_search.execute()
    return [rating_from_ratings_by_viewer_hit(hit) for hit in response]
Ejemplo n.º 15
0
def es_search_author_name(author_name):
    q = {
        "size": 10,
        "from": 0,
        "query":{
            "function_score": {
                "query": {
                    "bool":{
                        "should": {"match": {"DisplayName": author_name}}
                    }
                },
                "functions": [
                    {"script_score": {
                        "script": "Math.pow(_score, 3) * (Math.log((doc['CitationCount'].value + 10)))"
                    }}
                ]
            }
        }
    }
    s = Search(using=client, index="authors")
    s = s.update_from_dict(q)
    response = s.execute()
    result = response.to_dict()["hits"]["hits"]
    cols = ["AuthorId", "DisplayName", "NormalizedName", "PaperCount", "CitationCount"]
    data = []
    if result:
        data = [r["_source"] for r in result]
        sorted_data = sorted(data, key=itemgetter("PaperCount"), reverse=True)
        return sorted_data[0]
    else:
        print("[es_search_author_name] no result", author_name)
        return []
Ejemplo n.º 16
0
def execute_learn_search(*, user, query):
    """
    Execute a learning resources search based on the query

    Args:
        user (User): The user executing the search. Used to determine filters to enforce permissions.
        query (dict): The Elasticsearch query constructed in the frontend

    Returns:
        dict: The Elasticsearch response dict
    """
    index = get_default_alias_name(ALIAS_ALL_INDICES)
    search = Search(index=index)
    search.update_from_dict(query)
    search = _apply_learning_query_filters(search, user)
    return transform_results(search.execute().to_dict(), user)
Ejemplo n.º 17
0
def get_bucket_size(client, index, agg_field):
    """Function to help us define the size for our search query."""
    body = {
        "aggs": {
            "size_count": {
                "cardinality": {
                    "field": agg_field
                }
            }
        }
    }
    search = Search(using=client, index=index)
    search.update_from_dict(body)
    count = search.count()
    # NOTE: we increase the count by 10% in order to be safe
    return int(ceil(count + count * 0.1))
Ejemplo n.º 18
0
def get_authors_from_paper(paper_id):
    result = []
    q = {
      "_source": "AuthorId",
      "size": 3,
      "sort": [{"AuthorSequenceNumber":"asc"}],
      "query": {
        "term": {"PaperId": paper_id}
      }
    }
    s = Search(using=client, index="paperauthoraffiliations")
    s.update_from_dict(q)
    response = s.execute()
    data = response.to_dict()["hits"]["hits"]
    author_ids = [res["_source"]["AuthorId"] for res in data]
    return author_ids
Ejemplo n.º 19
0
    def get_brands(
            self,
            keyword: str=None,
            prefix: List[str]=[], exclude: List[str]=[],
            page=1, size=1000, **kwargs) -> List[dict]:

        prefix = [item.lower().strip() for item in prefix]
        exclude = [item.lower().strip() for item in exclude]
        offset = (page - 1) * size
        s = Search(using=self.elasticsearch, index=self.INDEX_NAME)
        s = s.update_from_dict({
            "query": {
                "bool": {
                    "must_not": [
                        {
                            "terms": {
                                "brand_code": [item.lower() for item in exclude if item]
                            }
                        }
                    ], 
                    "must": [
                        {
                            "range": {
                                "sizes.qty": {
                                    "gte": 0
                                }
                            }
                        },
                        {
                            "bool": {
                                "should": [
                                    {
                                        "prefix": {
                                            "brand_code": item.lower()
                                        }
                                    } for item in prefix if item
                                ]
                            }
                        }
                    ]
                }
            },
            "aggs": {
                "brands": {
                    "terms": {
                        "field": "manufacturer",
                        "order": {
                            "_term": "asc"
                        },
                        "size": 1000
                    }
                }
            },
            "size": 10
        })

        response = s.execute()
        buckets = response.aggregations.brands.buckets
        return [item['key'] for item in buckets]
Ejemplo n.º 20
0
def create_search_obj(user,
                      search_param_dict=None,
                      filter_on_email_optin=False):
    """
    Creates a search object and prepares it with metadata and query parameters that
    we want to apply for all ES requests

    Args:
        user (User): User object
        search_param_dict (dict): A dict representing the body of an ES query
        filter_on_email_optin (bool): If true, filter out profiles where email_optin != True

    Returns:
        Search: elasticsearch_dsl Search object
    """
    staff_program_ids = get_advance_searchable_program_ids(user)
    is_advance_search_capable = bool(staff_program_ids)
    index_type = PRIVATE_ENROLLMENT_INDEX_TYPE if is_advance_search_capable else PUBLIC_ENROLLMENT_INDEX_TYPE
    index = get_default_alias(index_type)
    search_obj = Search(index=index)
    # Update from search params first so our server-side filtering will overwrite it if necessary
    if search_param_dict is not None:
        search_obj.update_from_dict(search_param_dict)

    if not is_advance_search_capable:
        # Learners can't search for other learners with privacy set to private
        search_obj = search_obj.filter(
            ~Q('term', **{'profile.account_privacy': Profile.PRIVATE})  # pylint: disable=invalid-unary-operand-type
        )

    # Limit results to one of the programs the user is staff on
    search_obj = search_obj.filter(
        create_program_limit_query(
            user,
            staff_program_ids,
            filter_on_email_optin=filter_on_email_optin))
    # Filter so that only filled_out profiles are seen
    search_obj = search_obj.filter(Q('term', **{'profile.filled_out': True}))
    # Force size to be the one we set on the server
    update_dict = {'size': settings.ELASTICSEARCH_DEFAULT_PAGE_SIZE}
    if search_param_dict is not None and search_param_dict.get(
            'from') is not None:
        update_dict['from'] = search_param_dict['from']
    search_obj.update_from_dict(update_dict)

    return search_obj
Ejemplo n.º 21
0
 def get_main_page_checks(self, checks=None, primary=True):
     """
     Gets all checks for the main page. If primary is true then all checks will
     be primary, otherwise we use latest.
     Only gets ES_SEARCH_SIZE number of results, most recent first.
     """
     if primary:
         t = 'primary'
     else:
         t = 'latest'
     doc = {
         'size': self.ES_SEARCH_SIZE,
         'query': {
             'bool': {
                 'must': {
                     'query_string': {
                         'query': 'id_alias:"*' + t + '.json"'
                     }
                 },
                 'filter': {
                     'term': {
                         'type': 'check'
                     }
                 }
             }
         },
         'sort': {
             'uuid': {
                 'order': 'desc'
             }
         }
     }
     search = Search(using=self.es, index=self.index)
     search.update_from_dict(doc)
     raw_result = self.search(search)
     if checks is not None:
         # figure out which checks we didn't find, add a placeholder check so
         # that check is still rendered on the UI
         raw_result = list(
             filter(lambda res: res['name'] in checks, raw_result))
         found_checks = set(res['name'] for res in raw_result)
         for check_name in checks:
             if check_name not in found_checks:
                 raw_result.append(
                     CheckSchema().create_placeholder_check(check_name))
     return raw_result
Ejemplo n.º 22
0
def all_field_values(
    elasticsearch_client: Elasticsearch,
    field: str,
    query: Dict[str, Any],
    index_pattern: str = RAW_IMAGES_INDEX_PATTERN
) -> Generator[str, None, None]:

    s = Search(using=elasticsearch_client, index=index_pattern)
    agg = {"aggs": {"all_values": {"terms": {"field": field, "size": 100000}}}}
    agg["query"] = query["query"]
    s.update_from_dict(agg)
    resp = s.execute()
    unique_values = 0
    for item in resp.aggregations.all_values.buckets:
        yield item.key
        unique_values += 1
    log.debug(f"{unique_values} unique values for {field}")
Ejemplo n.º 23
0
def create_search_obj(user, search_param_dict=None):
    """
    Creates a search object and prepares it with metadata and query parameters that
    we want to apply for all ES requests

    Args:
        user (User): User object
        search_param_dict (dict): A dict representing the body of an ES query

    Returns:
        Search: elasticsearch_dsl Search object
    """
    search_obj = Search(index=settings.ELASTICSEARCH_INDEX, doc_type=DOC_TYPES)
    if search_param_dict is not None:
        search_obj.update_from_dict(search_param_dict)
    search_obj = search_obj.query(create_program_limit_query(user))
    return search_obj
Ejemplo n.º 24
0
def create_search_obj(user, search_param_dict=None, filter_on_email_optin=False):
    """
    Creates a search object and prepares it with metadata and query parameters that
    we want to apply for all ES requests

    Args:
        user (User): User object
        search_param_dict (dict): A dict representing the body of an ES query
        filter_on_email_optin (bool): If true, filter out profiles where email_optin != True

    Returns:
        Search: elasticsearch_dsl Search object
    """
    staff_program_ids = get_advance_searchable_program_ids(user)
    is_advance_search_capable = bool(staff_program_ids)
    index_type = PRIVATE_ENROLLMENT_INDEX_TYPE if is_advance_search_capable else PUBLIC_ENROLLMENT_INDEX_TYPE
    index = get_default_alias(index_type)
    search_obj = Search(index=index)
    # Update from search params first so our server-side filtering will overwrite it if necessary
    if search_param_dict is not None:
        search_obj.update_from_dict(search_param_dict)

    if not is_advance_search_capable:
        # Learners can't search for other learners with privacy set to private
        search_obj = search_obj.filter(
            ~Q('term', **{'profile.account_privacy': Profile.PRIVATE})  # pylint: disable=invalid-unary-operand-type
        )

    # Limit results to one of the programs the user is staff on
    search_obj = search_obj.filter(create_program_limit_query(
        user,
        staff_program_ids,
        filter_on_email_optin=filter_on_email_optin
    ))
    # Filter so that only filled_out profiles are seen
    search_obj = search_obj.filter(
        Q('term', **{'profile.filled_out': True})
    )
    # Force size to be the one we set on the server
    update_dict = {'size': settings.ELASTICSEARCH_DEFAULT_PAGE_SIZE}
    if search_param_dict is not None and search_param_dict.get('from') is not None:
        update_dict['from'] = search_param_dict['from']
    search_obj.update_from_dict(update_dict)

    return search_obj
Ejemplo n.º 25
0
    def query(self, query_body):

        '''
        Method to run query against Job's ES index
        '''

        # init query
        query = Search(using=es_handle, index=self.es_index)

        # update with query_body
        if isinstance(query_body, dict):
            query = query.update_from_dict(query_body)
        elif isinstance(query_body, str):
            query = query.update_from_dict(json.loads(query_body))

        # execute and return
        results = query.execute()
        return results
Ejemplo n.º 26
0
def _fetch_latest_for_source(source):
    query_body = {
        "size": 20,
        "sort": [{
            "pubDate": {
                "unmapped_type": "date",
                "order": "desc"
            }
        }],
        "query": {
            "match": {
                "source": source
            }
        }
    }
    query = Search(index='rss', doc_type='item')
    query.update_from_dict(query_body)
    res = query.execute()
    return res
Ejemplo n.º 27
0
 def list_all_keys(self):
     """
     Generic search on es that will return all ids of indexed items
     Only gets ES_SEARCH_SIZE number of results, most recent first.
     """
     doc = {
         'size': self.ES_SEARCH_SIZE,
         'query': {
             'match_all': {}
         },
         'sort': {
             'uuid': {
                 'order': 'desc'
             }
         }
     }
     search = Search(using=self.es, index=self.index)
     search.update_from_dict(doc)
     return self.search(search, key='_id')
Ejemplo n.º 28
0
 def get_all_objects(self):
     """
     Calls list_all_keys with full=True to get all the objects
     Only gets ES_SEARCH_SIZE number of results, most recent first.
     """
     doc = {
         'size': self.ES_SEARCH_SIZE,
         'query': {
             'match_all': {}
         },
         'sort': {
             'uuid': {
                 'order': 'desc'
             }
         }
     }
     search = Search(using=self.es, index=self.index)
     search.update_from_dict(doc)
     return self.search(search)
Ejemplo n.º 29
0
def search(request):
    SIZE = 40
    q = request.GET.get('q', '')
    _from = int(request.GET.get('from', 0))
    query = Search(index='rss', doc_type='item')
    query_body = {
        'size': SIZE,
        'from': _from,
        'query': {
            'query_string': {
                'fields': ['title', 'body'],
                'query': q
            },
            # "sort": [
            #     {"pubDate": {"order": "desc"}},
            # ]
        }
    }
    query.update_from_dict(query_body)

    try:
        res = query.execute()
    except elasticsearch.RequestError as err:
        json_error = json.dumps(err.info['error']['root_cause'], indent=4)
        return render(request, 'rss/search_error.html', {
            'json_error': json_error,
            'q': q
        })

    _convert_dates(res.hits)
    total_hits = res['hits']['total']
    context = {
        'q': q,
        'hits': res.hits,
        'total_hits': total_hits,
        'has_prev': _from != 0,
        'has_next': (total_hits - _from - SIZE) > 0,
        'prev': _from - SIZE,
        'next': _from + SIZE,
        'page_num': (math.floor(_from / SIZE) + 1),
    }
    return render(request, 'rss/search.html', context)
Ejemplo n.º 30
0
 def get_queryset(self):
     try:
         keyword = self.request.GET['keyword']
         s = Search(using=es, index='recipe')
         s.update_from_dict({
             'query': {
                 'match': {
                     'name': {
                         'query': keyword,
                         'type': 'phrase_prefix',
                         'slop': 2
                     }
                 },
             }
         })
         s = s.extra(size=1000)
         results = s.execute()
         return results
     except MultiValueDictKeyError:
         pass
Ejemplo n.º 31
0
    def find_posts(self, query, strategy="fuzzy", size=50):
        """
        Find all posts that match against the query searched.
        Fuzzy matching is turned on by default, but we can use exact
        string matching by simply the changing strategy arg to "match"

        :param size:
        :param query (str): what are we searching for
        :param strategy (str): matching strategy
        :return: posts (list): a list of posts (dicts) that are similar to the query, by content
        """
        search = Search(using=self.client, index=self.index)
        search.update_from_dict({"size": size})
        results = search.doc_type(Post.DOC_TYPE).query(
            strategy, content=query).execute()
        posts = []
        for hit in results:
            posts.append(hit.to_dict())
        posts.sort(key=lambda x: -x["score"])
        return posts
Ejemplo n.º 32
0
def get_field_description(es, field_name):
    s = Search(using=es, index=current_app.config['INDEX_NAME'] + '_fields')
    s.update_from_dict(
        {"query": {
            "bool": {
                "must": [{
                    "match": {
                        "_id": field_name
                    }
                }]
            }
        }})
    hits = s.execute()['hits']['hits']
    if len(hits) == 0:
        raise ValueError(
            'elasticsearch_field_name %s not found in Elasticsearch index %s' %
            (field_name, current_app.config['INDEX_NAME'] + '_fields'))
    if 'description' in hits[0]['_source']:
        return hits[0]['_source']['description']
    return ''
Ejemplo n.º 33
0
def get_names_from_entity(entity_ids, index, id_field, name_field, with_id=False):

    result = []
    q = {
      "_source": [name_field,id_field],
      "size": 100,
      "query": {
        "terms": {id_field : entity_ids}
      }
    }
    s = Search(using=client, index=index)
    s.update_from_dict(q)
    response = s.execute()
    data = response.to_dict()["hits"]["hits"]
    id_name_dict = {res["_source"][id_field]: res["_source"][name_field] for res in data}
    if with_id:
        return id_name_dict
    ids = [id_name_dict[eid] for eid in entity_ids]

    return ids
Ejemplo n.º 34
0
def search_graphs1(request, owner_email=None, names=None, nodes=None, edges=None, tags=None, member_email=None,
                   is_public=None, query=None, limit=20, offset=0, order='desc', sort='name'):
	sort_attr = getattr(db.Graph, sort if sort is not None else 'name')
	orber_by = getattr(db, order if order is not None else 'desc')(sort_attr)
	is_public = int(is_public) if is_public is not None else None

	if member_email is not None:
		member_user = users.controllers.get_user(request, member_email)
		if member_user is not None:
			group_ids = [group.id for group in users.controllers.get_groups_by_member_id(request, member_user.id)]
		else:
			raise Exception("User with given member_email doesnt exist.")
	else:
		group_ids = None

	if edges is not None:
		edges = [tuple(edge.split(':')) for edge in edges]

	if 'query' in query:
		s = Search(using=settings.ELASTIC_CLIENT, index='graphs')
		s.update_from_dict(query)
		s.source(False)
		graph_ids = [int(hit.meta.id) for hit in s.scan()]
	else:
		graph_ids = None

	total, graphs_list = db.find_graphs(request.db_session,
	                                    owner_email=owner_email,
	                                    graph_ids=graph_ids,
	                                    is_public=is_public,
	                                    group_ids=group_ids,
	                                    names=names,
	                                    nodes=nodes,
	                                    edges=edges,
	                                    tags=tags,
	                                    limit=limit,
	                                    offset=offset,
	                                    order_by=orber_by)

	return total, graphs_list
Ejemplo n.º 35
0
    def get_context_data(self, **kwargs):
        context = super(RecipeDetailView, self).get_context_data(**kwargs)
        recipe = self.get_queryset().all()[0]
        # increment count
        if not self.request.session.get('recipe_viewed_%s' % recipe.pk, None):
            recipe.increment_views()
            self.request.session['recipe_viewed_%s' % recipe.pk] = 1
        logger.error('recipe views %s', recipe.views)
        logger.error('recipe session %s', self.request.session.keys())
        course_info = recipe.courses.all()
        holiday_info = recipe.holidays.all()


        context['title'] = context['recipe'].name

        s = Search(using=es, index='recipe')

        exclude_clause = []
        match_clause = []

        exclude_clause.append(
            {"term": {"document_id": recipe.id}}
        )
        if course_info:
            course_id = course_info[0].id
            match_clause.append({'match': {'courses': {'query': course_id, 'boost': 5}}})
        if holiday_info:
            holiday_id = holiday_info[0].id
            match_clause.append({'match': {'holidays': holiday_id}})

        match_clause.append({'match': {'name': {'query': recipe.name, 'boost': 2}}})

        s = Search(using=es, index='recipe')
        s.update_from_dict({
            'query': {
                'function_score': {
                    'query': {
                        'bool': {
                            "must_not": exclude_clause,
                            'should': match_clause
                        }
                    },
                    'random_score': {
                        'seed': 12371203
                    }
                }
            }
        })

        s = s.extra(size=6)
        results = s.execute()
        context['suggested_recipes'] = results

        if self.request.user.is_authenticated():
            user_collection = UserCollection.objects.filter(user=self.request.user)
            recipe_collection = RecipesCollection.objects\
                .filter(recipe_id=context['recipe'].id, collection__user=self.request.user)\
                .only('collection__id')
            user_recipe_collection = set(i.collection_id for i in recipe_collection.all())
            initial = {
                'recipes': self.kwargs.get("pk"),
                'recipe_collection': user_collection,
                'user_recipe_collection': user_recipe_collection
            }
            context['form'] = UserRecipeCollectionForm(
                initial=initial
            )
        context['searchform'] = SearchKeywordForm()
        context['current_recipe_name'] = recipe.name
        return context