Exemple #1
 def create_index(self):
     body = {
         "settings": settings.ES_SETTINGS,
         "mappings": {
             settings.ES_ARTICLE_DOCTYPE: settings.ES_MAPPING
     indices.IndicesClient(self.es).create(self.index, body)
Exemple #2
def get_stemmed_form(idx, word):
    Returns the stemmed form of a word for this

        idx : str
            The name of the elasticsearch index
        word : str
            The input word
    result = indices.IndicesClient(_es()).analyze(index=idx, text=word, analyzer=_STEMMING_ANALYZER)
    return result['tokens'][0]['token']
Exemple #3
 def check_index(self):
     Check whether the server is up and the index exists.
     If the server is down, raise an exception.
     If the index does not exist, try to create it.
     if not self.es.ping():
         raise Exception("Elastic server cannot be reached")
     if not indices.IndicesClient(self.es).exists(self.index):
         log.info("Index {self.index} does not exist, creating".format(**locals()))
     x = cluster.ClusterClient(self.es).health(self.index, wait_for_status='yellow')
Exemple #4
    def create_index(self, shards=5, replicas=1):
        es_settings = settings.ES_SETTINGS.copy()
        es_settings.update({"number_of_shards" : shards,
                            "number_of_replicas": replicas})

        body = {
            "settings": es_settings,
            "mappings": {
                settings.ES_ARTICLE_DOCTYPE: settings.ES_MAPPING

        indices.IndicesClient(self.es).create(self.index, body)
Exemple #5
 def delete_index(self):
     except Exception, e:
         if 'IndexMissingException' in unicode(e): return
Exemple #6
 def flush(self):
Exemple #7
def do_search(idx, typ, query, start, num, date_ranges, exclude_distributions,
              exclude_article_types, selected_pillars, return_source=False, sort_order='_score'):
    """Returns ElasticSearch search results.

    Fetch all documents matching the query and return a list of
    elasticsearch results.

    This method accepts boolean queries in the Elasticsearch query string
    syntax (see Elasticsearch reference).

        idx : str
            The name of the elasticsearch index
        typ : str
            The type of document requested
        query : str
            A query string in the Elasticsearch query string language
        start : int
            An integer representing the index of the first result to be
        num : int
            The total number of results to be retrieved
        date_ranges : list(dict)
            A list of dictionaries containg the upper and lower dates of the
            requested date ranges
        exclude_distributions : list
            A list of strings respresenting distributions that should be
            excluded from the search
        exclude_article_types : list
            A list of strings representing article types that should be
            excluded from the search
        selected_pillars : list
            A list of string representing pillars that should be included into
            the search. Each pillar is linked to a list of newspapers.
        return_source : boolean, optional
            A boolean indicating whether the _source of ES documents should be
            returned or a smaller selection of document fields. The smaller set
            of document fields (stored in _ES_RETURN_FIELDS) is the default
        sort_order: string, optional
            The sort order for this query. Syntax is fieldname:order, multiple
            sort orders can be separated by commas. Note that if the sort_order
            doesn't contain _score, no scores will be returned.

        validity : boolean
            A boolean indicating whether the input query string is valid.
        results : list
            A list of elasticsearch results or a message explaining why the
            input query string is invalid.
    q = create_query(query, date_ranges, exclude_distributions,
                     exclude_article_types, selected_pillars)

    valid_q = indices.IndicesClient(_es()).validate_query(index=idx,

    if valid_q.get('valid'):
        if return_source:
            # for each document return the _source field that contains all
            # document fields (no fields parameter in the ES call)
            return True, _es().search(index=idx, doc_type=typ, body=q,
                                      from_=start, size=num, sort=sort_order)
            # for each document return the fields listed in_ES_RETURN_FIELDS
            return True, _es().search(index=idx, doc_type=typ, body=q,
                                      fields=_ES_RETURN_FIELDS, from_=start,
                                      size=num, sort=sort_order)
    return False, valid_q.get('explanations')[0].get('error')
Exemple #8
 def clear_cache(self):