Ejemplo n.º 1
0
def query(filename):
    contents = file_contents(filename)

    try:
        Search.from_dict(json.loads(contents))
    except json.JSONDecodeError:
        raise ArgumentTypeError(f"invalid JSON in '{filename}'")
    except exceptions.ElasticsearchDslException as e:
        raise ArgumentTypeError(str(e))

    return contents
Ejemplo n.º 2
0
def test_add_channel_channel_already_exists(mock_staff_client, patched_users_api):
    """Channel already exists with that channel name"""
    response_409 = Response()
    response_409.status_code = statuses.HTTP_409_CONFLICT
    mock_staff_client.channels.create.return_value = response_409

    title = "title"
    name = "name"
    description = "public description"
    channel_type = "private"
    input_search = Search.from_dict({"unmodified": "search"})
    role = RoleFactory.create()
    mod = UserFactory.create()

    with pytest.raises(ChannelAlreadyExistsException):
        api.add_channel(
            original_search=input_search,
            title=title,
            name=name,
            description=description,
            channel_type=channel_type,
            program_id=role.program.id,
            creator_id=mod.id,
        )

    mock_staff_client.channels.create.assert_called_once_with(
        title=title,
        name=name,
        description=description,
        channel_type=channel_type,
    )
    def get_testrun_list_id_only(self, project_id, params=None):
        if params is None:
            params = {}
        index = project_id
        limit = params.get("limit", 1000)
        if not isinstance(limit, int):
            limit = int(limit)
        env = params.get("env", '')
        suite = params.get("suite", '')

        query_body = {"collapse": {"field": "testrun_id.keyword"}}
        search_obj = Search.from_dict(query_body)
        if env and env != "all":
            search_obj = search_obj.query("match_phrase", env=env)
        if suite and suite != "all":
            search_obj = search_obj.query("match_phrase", suite_name=suite)
        search_obj = search_obj.source(['testrun_id']).sort(
            {"testrun_id.keyword": {
                "order": "desc"
            }})
        data = self.common_search(search_obj=search_obj,
                                  index=index,
                                  limit=limit)
        d = list()
        for t in data:
            i = t.get('testrun_id')
            if i:
                d.append(i)
        return d
Ejemplo n.º 4
0
def get_providers(index):
    """
    Given an index, find all available data providers.

    :param index: An Elasticsearch index, such as `'image'`.
    :return: A list of providers represented as strings. Example: `['met']`
    """
    provider_cache_name = 'providers-' + index
    cache_timeout = 60 * 5
    providers = cache.get(key=provider_cache_name)
    if not providers:
        elasticsearch_maxint = 2147483647
        agg_body = {
            'aggs': {
                'unique_providers': {
                    'terms': {
                        'field': 'provider.keyword',
                        'size': elasticsearch_maxint,
                        "order": {
                            "_key": "desc"
                        }
                    }
                }
            }
        }
        s = Search.from_dict(agg_body)
        s.index = index
        results = s.execute().aggregations['unique_providers']['buckets']
        providers = [result['key'] for result in results]
        cache.set(key=provider_cache_name,
                  timeout=cache_timeout,
                  value=providers)

    return providers
Ejemplo n.º 5
0
 def get_maxts_index_pattern(self, indexpattern, timefield="@timestamp"):
     """Returns the timestamp of the record in ealsticsearch index pattern which is the newest"""
     body = {"aggs": {"max_timestamp": {"max": {"field": timefield}}}}
     s = Search.from_dict(body)
     s = s.index(indexpattern).using(self.es_conn)
     t = s.execute()
     return int(t.aggregations.max_timestamp.value)
Ejemplo n.º 6
0
 def delete_by_search(self, search):
     search = Search.from_dict(search)
     search.using(self.es)
     response = search.delete()
     if response.success():
         return response.to_dict(), 'Success'
     else:
         return 'Search error', 'SearchError'
Ejemplo n.º 7
0
def update_query(input_query: Dict, cql: CQLModel):
    s = Search.from_dict(input_query)
    query = ESQueryBuilder()
    output_query = _build_query(query, cql)
    s = s.query(output_query)

    LOGGER.debug('Enhanced query: {}'.format(json.dumps(s.to_dict())))
    return s.to_dict()
Ejemplo n.º 8
0
def search():

    query = template('abstract_template', keyword=request.json['keyword'])
    search = Search.from_dict(json.loads(query))

    print(search.to_dict())

    return search.to_dict()
Ejemplo n.º 9
0
async def esRun(params, index):
    #print('params,index',params,index)
    s = Search.from_dict(params).using(esconnection).index(index)
    #print('sss', s)
    try:
        result = s.execute(ignore_cache=True)
    except Exception as e:
        result = e
    finally:
        #print(result)
        return result
Ejemplo n.º 10
0
def search_product(genders=None,
                   category_ids=None,
                   brand_ids=None,
                   seasons=None,
                   sizes=None,
                   sortby='relevance',
                   onsale=False,
                   freetext=None,
                   offset=0,
                   limit=60):
    filters = []
    if category_ids is not None and len(category_ids) > 0:
        filters.append({'terms': {'category_ids': category_ids}})
    if brand_ids is not None and len(brand_ids) > 0:
        filters.append({'terms': {'brand_id': brand_ids}})
    if genders is not None and len(genders) > 0:
        filters.append({'terms': {'gender': genders}})
    if seasons is not None and len(seasons) > 0:
        filters.append({'terms': {'season': seasons}})
    if sizes is not None and len(sizes) > 0:
        filters.append({'terms': {'sizes': sizes}})
    if onsale:
        filters.append({'range': {'discount_percentage': {'gt': 0}}})

    matches = []
    if freetext:
        matches.append({'match': {'model': {'query': freetext, 'boost': 2.0}}})
        matches.append({'match': {'brand': {'query': freetext, 'boost': 2.0}}})
        matches.append({'match': {'categories': freetext}})
        matches.append({'match': {'description': freetext}})

    sorts = []
    if sortby == 'price_high':
        sorts.append({'price_eur': {'order': 'desc'}})
    elif sortby == 'price_low':
        sorts.append({'price_eur': {'order': 'asc'}})
    elif sortby == 'sale':
        sorts.append({'discount_percentage': {'order': 'desc'}})
    elif sortby == 'newest':
        sorts.append({'insert_time': {'order': 'desc'}})
    sorts.append({'photo_quality': {'order': 'desc'}})
    sorts.append('_score')

    bool_query = {'filter': filters}
    if len(matches) > 0:
        bool_query['should'] = matches
        bool_query['minimum_should_match'] = 1

    query = {'query': {'bool': bool_query}, 'sort': sorts}
    print query
    s = Search.from_dict(query)
    for hit in s[offset:offset + limit].execute():
        print hit
Ejemplo n.º 11
0
 def test_adjust_search_for_percolator(self, mock_on_commit):
     """adjust_search_for_percolator should move post_filter into the query itself and remove all other pieces"""
     original_query = {
         "query": {
             "multi_match": {
                 "query": "p",
                 "analyzer": "folding",
                 "type": "phrase_prefix",
                 "fields": ["profile.first_name.folded"]
             }
         },
         "post_filter": {
             "term": {
                 "program.id": 1
             }
         },
         "aggs": {
             "profile.work_history.company_name11": {
                 "filter": {
                     "term": {
                         "program.id": 1
                     }
                 }
             }
         },
         "size": 50,
         "sort": [{
             "profile.last_name": {
                 "order": "asc"
             }
         }]
     }
     search_obj = Search.from_dict(original_query)
     adjusted_search = adjust_search_for_percolator(search_obj)
     assert adjusted_search.to_dict() == {
         'query': {
             'bool': {
                 'filter': [{
                     'term': {
                         'program.id': 1
                     }
                 }],
                 'must': [{
                     'multi_match': {
                         'analyzer': 'folding',
                         'fields': ['profile.first_name.folded'],
                         'query': 'p',
                         'type': 'phrase_prefix'
                     }
                 }]
             }
         }
     }
Ejemplo n.º 12
0
    def search(self):
        """Returns an unbounded search object based on the saved query. Call
        the execute method when ready to retrieve the results."""
        import json

        s = Search.from_dict(json.loads(self.query))\
            .using(DailyIndexDocType._doc_type.using)\
            .index(self.index_prefix)

        if self.doc_type is not None:
            s = s.doc_type(self.doc_type)

        return s
Ejemplo n.º 13
0
    def test_add_automatic_email(self):
        """Add an AutomaticEmail entry with associated PercolateQuery"""
        assert AutomaticEmail.objects.count() == 3
        search_obj = Search.from_dict({"query": {"match": {}}})

        new_automatic = add_automatic_email(search_obj, 'subject', 'body', 'sender', self.staff_user)
        assert AutomaticEmail.objects.count() == 4
        assert new_automatic.sender_name == 'sender'
        assert new_automatic.email_subject == 'subject'
        assert new_automatic.email_body == 'body'
        assert new_automatic.query.query == adjust_search_for_percolator(search_obj).to_dict()
        assert new_automatic.query.source_type == PercolateQuery.AUTOMATIC_EMAIL_TYPE
        assert new_automatic.staff_user == self.staff_user
Ejemplo n.º 14
0
async def esRun(params,index):
    #print('params,index',params,index)
    s = Search.from_dict(params).using(esconnection).index(index)
    #print('sss', s)
    try:
        result =  s.execute(ignore_cache=True)
        #result =  s.execute()
    except:
        #print(e)
        raise
    else:
        #print('result',result)
        return result
Ejemplo n.º 15
0
    def search(self):
        """Returns an unbounded search object based on the saved query. Call
        the execute method when ready to retrieve the results."""
        import json

        s = Search.from_dict(json.loads(self.query))\
            .using(DailyIndexDocType._doc_type.using)\
            .index(self.index_prefix)

        if self.doc_type is not None:
            s = s.doc_type(self.doc_type)

        return s
Ejemplo n.º 16
0
def recommend_random(request):
    """Recommend two random jokes for inclusion on another page."""
    try:
        search = Search.from_dict({'query': {'function_score': {'query': {'match_all': {}}, 'random_score': {}}}})
        search = search[0:2]
        results = search.execute()
        joke_ids = [joke.meta.id for joke in results]
        if joke_ids and len(joke_ids) >= 2:
            jokes = request.dbsession.query(Image).filter(Image.id.in_(joke_ids))
            return {'jokes': jokes}
    except ConnectionError:
        pass
    raise HTTPNotFound()
Ejemplo n.º 17
0
    def test__esp_add_query_dslquery(self):
        page = 1
        page_size = 20

        query = {
            "query": {
                "match": {
                    "_all": "foobar"
                }
            }
        }

        s = Search.from_dict(query.copy())

        # ElasticsearchProcessor internally sets the from/size parameters
        # on the query; we need to compare with those values included
        query_with_size = query.copy()
        query_with_size.update({
            'from': (page - 1) * page_size,
            'size': page_size
        })

        esp = ElasticsearchProcessor()
        esp.add_search(s)
        ddtools.assert_equal(esp.bulk_search_data[0], {})
        ddtools.assert_equal(esp.bulk_search_data[1], query_with_size)

        esp.reset()
        esp.add_search(s, index='blog')
        ddtools.assert_equal(esp.bulk_search_data[0], {'index': 'blog'})
        ddtools.assert_equal(esp.bulk_search_data[1], query_with_size)

        esp.reset()
        esp.add_search(s, index='blog', doc_type='posts')
        ddtools.assert_equal(esp.bulk_search_data[0], {'index': 'blog', 'type': 'posts'})
        ddtools.assert_equal(esp.bulk_search_data[1], query_with_size)

        s = s.index('blog').params(routing='id')

        esp.reset()
        esp.add_search(s)
        ddtools.assert_equal(esp.bulk_search_data[0], {'index': ['blog'], 'routing': 'id'})
        ddtools.assert_equal(esp.bulk_search_data[1], query_with_size)

        s = s.doc_type('posts')

        esp.reset()
        esp.add_search(s)
        ddtools.assert_equal(esp.bulk_search_data[0], {'index': ['blog'], 'type': ['posts'], 'routing': 'id'})
        ddtools.assert_equal(esp.bulk_search_data[1], query_with_size)
Ejemplo n.º 18
0
 def search_test_result_field(self, project_id, key):
     query_body = {"collapse": {"field": f"{key}.keyword"}}
     search_obj = Search.from_dict(query_body)
     search_obj = search_obj.source([key]).sort(
         {f"{key}.keyword": {
             "order": "asc"
         }})
     data = self.common_search(search_obj=search_obj, index=project_id)
     print(data)
     d = list()
     for t in data:
         i = t.get(key)
         if i:
             d.append(i)
     return d
Ejemplo n.º 19
0
 def on_dropbox_but_not_on_nas(self, limit: int = 0):
     s = Search.from_dict({
         "query": {
             "bool": {
                 "must_not": {"exists": {"field": "nas"}},
                 "filter": {"term": {"dropbox": "true"}}
                 }
             }
         })
     s = s.using(self.elastic).index(self.index)
     if limit:
         s = s[:limit]
     result = s.execute()
     for e in result.hits:
         yield Factory.from_elastic_entry(e)
Ejemplo n.º 20
0
    def test_add_automatic_email(self):
        """Add an AutomaticEmail entry with associated PercolateQuery"""
        assert AutomaticEmail.objects.count() == 3
        search_obj = Search.from_dict({"query": {"match": {}}})

        new_automatic = add_automatic_email(search_obj, 'subject', 'body',
                                            'sender', self.staff_user)
        assert AutomaticEmail.objects.count() == 4
        assert new_automatic.sender_name == 'sender'
        assert new_automatic.email_subject == 'subject'
        assert new_automatic.email_body == 'body'
        assert new_automatic.query.query == adjust_search_for_percolator(
            search_obj).to_dict()
        assert new_automatic.query.source_type == PercolateQuery.AUTOMATIC_EMAIL_TYPE
        assert new_automatic.staff_user == self.staff_user
Ejemplo n.º 21
0
 def get_keywords(cls, keyword):
     q_dict = {
         'size': 0,
         'aggs': {
             'question': {
                 'terms': {
                     'field': '%s.raw' % keyword,
                     'size': 250
                 }
             }
         }
     }
     searcher = Search.from_dict(q_dict)
     answers = searcher.execute()
     retval = [keyword['key'] for keyword in answers.aggregations.question]
     return retval
Ejemplo n.º 22
0
def get_product(query, limit, offset):
    query = {
        "query": {
            "multi_match": {
                "query": query,
                "fields": ["brand.name^5", "name"]
            }
        },
        "size": limit,
        "from": offset
    }
    search = Search(index='product', using=client)
    search = search.from_dict(query)
    response = search.execute()
    total = response['hits']['total']
    data = [item['_source'].to_dict() for item in response['hits']['hits']]
    return total, data
Ejemplo n.º 23
0
 def test_adjust_search_for_percolator(self, mock_on_commit):
     """adjust_search_for_percolator should move post_filter into the query itself and remove all other pieces"""
     original_query = {
         "query": {
             "multi_match": {
                 "query": "p",
                 "analyzer": "folding",
                 "type": "phrase_prefix",
                 "fields": ["profile.first_name.folded"]
             }
         },
         "post_filter": {"term": {"program.id": 1}},
         "aggs": {
             "profile.work_history.company_name11": {
                 "filter": {"term": {"program.id": 1}}
             }
         },
         "size": 50,
         "sort": [
             {
                 "profile.last_name": {"order": "asc"}
             }
         ]
     }
     search_obj = Search.from_dict(original_query)
     adjusted_search = adjust_search_for_percolator(search_obj)
     assert adjusted_search.to_dict() == {
         'query': {
             'bool': {
                 'filter': [
                     {'term': {'program.id': 1}}
                 ],
                 'must': [
                     {
                         'multi_match': {
                             'analyzer': 'folding',
                             'fields': ['profile.first_name.folded'],
                             'query': 'p',
                             'type': 'phrase_prefix'
                         }
                     }
                 ]
             }
         }
     }
Ejemplo n.º 24
0
 def get_peak_of_index(self, index, timefield="@timestamp"):
     """Returns the peak of the index in elasticsearch by the no of records preserved in a second"""
     body = dict(
         aggs={
             "record": {
                 "date_histogram": {
                     "field": timefield,
                     "interval": "1s",
                     "order": {
                         "_count": "desc"
                     }
                 }
             }
         })
     s = Search.from_dict(body)
     s = s.index(index).using(self.es_conn)
     t = s.execute()
     return t.aggregations.record.buckets[0]['doc_count']
Ejemplo n.º 25
0
 def matchall(self, index_name, size=5000):
     begin_index = -1
     while True:
         body = {
             "size": size,
             "query": {
                 "match_all": {}
             },
             "search_after": [begin_index],
             "sort": ['_doc']
         }
         s = Search.from_dict(body).using(self.client).index(index_name)
         response = s.execute()
         for hit in response:
             yield hit.message
         if len(response) == 0:
             break
         else:
             begin_index = response[-1].meta.sort[0]
Ejemplo n.º 26
0
    def _get_parsed_data(self):
        # Error will be set to true if we encounter an error
        parsed_data = dict(raw=[], error=False, data=[])
        source = ElasticsearchSource.objects.get(name=self.source.name)
        multisearch = MultiSearch()

        if source.max_concurrent_searches is not None:
            multisearch.params(
                max_concurrent_searches=source.max_concurrent_searches)

        for query in json.loads(self.queries):
            multisearch = multisearch.add(
                Search.from_dict(query).params(ignore_unavailable=True,
                                               allow_no_indices=True))

        try:
            responses = multisearch.using(source.client).index(
                source.index).execute()

            for response in responses:
                raw_data = response.to_dict()
                parsed_data['raw'].append(raw_data)

                if raw_data['hits']['hits'] == []:
                    continue

                self._check_response_size(raw_data)

                data = self._parse_es_response([raw_data['aggregations']])
                if data == []:
                    continue

                parsed_data['data'].extend(data)

        except Exception as e:
            logger.exception(
                'Error executing Elasticsearch queries: {}'.format(
                    self.queries))
            parsed_data['error_code'] = type(e).__name__
            parsed_data['error_message'] = six.text_type(e)
            parsed_data['error'] = True

        return parsed_data
Ejemplo n.º 27
0
    def get_langs_from_unlabeled_tweets(self, **kwargs):

        # TODO: we need to execute this in case the user doesn't have it enabled. I can't find the
        # PUT / twitterfdl2017 / _mapping / tweet
        # {
        #     "properties": {
        #         "lang": {
        #             "type": "text",
        #             "fielddata": true
        #         }
        #     }
        # }

        the_host = "http://" + kwargs["host"] + ":" + kwargs["port"]
        client = connections.create_connection(hosts=[the_host])
        s = Search(using=client, index=kwargs["index"], doc_type="tweet")

        body = {
            "size": 0,
            "aggs": {
                "distinct_lang": {
                    "terms": {
                        "field": "lang",
                        "size": 1000
                    }
                }
            }
        }

        s = Search.from_dict(body)
        s = s.index(kwargs["index"])
        s = s.doc_type("tweet")
        body = s.to_dict()

        t = s.execute()

        distinct_langs = []
        for item in t.aggregations.distinct_lang:
            # print(item.key, item.doc_count)
            distinct_langs.append(item.key)

        return distinct_langs
def calculate_field_counts(request, es_client):
    '''
    Given a download request and an elasticsearch client to work with, work out the number of values
    available per field, per resource for the search.

    :param request: the DownloadRequest object
    :param es_client: the elasticsearch client to use
    :return: a dict of resource ids -> fields -> counts
    '''
    field_counts = defaultdict(dict)
    for resource_id, version in request.resource_ids_and_versions.items():
        index_name = prefix_resource(resource_id)
        # get the base field mapping for the index so that we know which fields to look up, this
        # will get all fields from all versions and therefore isn't usable straight off the bat, we
        # have to then go and see which fields are present in the search at this version
        mapping = es_client.indices.get_mapping(index_name)[index_name]

        # we're going to do a multisearch to find out the number of records a value for each field
        # from the mapping
        search = MultiSearch(using=es_client, index=index_name)
        base_search = Search.from_dict(request.search) \
            .index(index_name) \
            .using(es_client) \
            .extra(size=0) \
            .filter(create_version_query(version))

        # get all the fields names and use dot notation for nested fields
        fields = [
            u'.'.join(parts) for parts, _config in iter_data_fields(mapping)
        ]
        for field in fields:
            # add a search which finds the documents that have a value for the given field at the
            # right version
            search = search.add(
                base_search.filter(u'exists', field=prefix_field(field)))

        responses = search.execute()
        for field, response in zip(fields, responses):
            field_counts[resource_id][field] = response.hits.total

    return field_counts
Ejemplo n.º 29
0
    def generate_count(c, args):
        """
        It is pretty unbelievable that there is no good way
        to get the termfrequency score from a document for a certain term without
        parsing a bunch of ugly strings in "exlpanation"...
        """
        es = Elasticsearch([settings.ES_URL])
        if args.get('srl'):
            pass
        else:
            s = Search(using=es)
            s = s.from_dict({"explain": "true"})
            #term_statistics="true"
            s = s.query("nested", path="sentences",
                query=Q("match_phrase",
                        sentences__content=args.get("query"),
                ),
                inner_hits={}
            )

        return es.mtermvectors(index="corpus", body=s.to_dict(), term_statistics="true")
Ejemplo n.º 30
0
def root(request):
    """Main landing page."""
    total_joke_count = request.dbsession.query(Image).filter(Image.type == 'joke').count()
    transcribed_joke_count = request.dbsession.query(Image).filter(and_(Image.type == 'joke',
                                                                        Image.status == 'final')).count()
    joke = None
    try:
        search = Search.from_dict({'query': {'function_score': {'query': {'match_all': {}}, 'random_score': {}}}})
        search = search.index(Joke.Index.name)
        search = search[0]
        results = search.execute()
        if len(results) == 1:
            joke = request.dbsession.query(Image).filter(and_(Image.id == results[0].meta.id,
                                                              Image.type == 'joke')).first()
    except ConnectionError:
        pass
    except NotFoundError:
        pass
    return {'total_joke_count': total_joke_count,
            'transcribed_joke_count': transcribed_joke_count,
            'joke': joke}
Ejemplo n.º 31
0
    def get_summary_info(self, project_ids):
        query_body = {"track_total_hits": True}
        search_obj = Search.from_dict(query_body)
        search_obj = search_obj.query("terms", _index=project_ids)
        search_obj.aggs.bucket('project_count', 'cardinality', field='_index')
        search_obj.aggs.bucket('testrun_count',
                               'cardinality',
                               field='testrun_id.keyword')
        # print(search_obj.to_dict())
        es_data = self.common_search(search_obj=search_obj,
                                     index="*",
                                     limit=0,
                                     raw_result=True)
        print(es_data.aggregations)
        data = dict()
        data["total"] = es_data.hits.total.value
        data['project_count'] = len(
            project_ids)  # es_data.aggregations.project_count.value
        data['testrun_count'] = es_data.aggregations.testrun_count.value

        return data
Ejemplo n.º 32
0
def test_add_channel_failed_create_channel(mock_staff_client, mocker):
    """If client.channels.create fails an exception should be raised"""
    response_500 = Response()
    response_500.status_code = statuses.HTTP_500_INTERNAL_SERVER_ERROR
    mock_staff_client.channels.create.return_value.raise_for_status.side_effect = HTTPError(response=response_500)

    with pytest.raises(ChannelCreationException) as ex:
        api.add_channel(
            Search.from_dict({}),
            "title",
            "name",
            "description",
            "channel_type",
            123,
            456,
        )
    assert ex.value.args[0] == "Error creating channel name"
    mock_staff_client.channels.create.return_value.raise_for_status.assert_called_with()
    assert mock_staff_client.channels.create.call_count == 1
    assert PercolateQuery.objects.count() == 0
    assert Channel.objects.count() == 0
Ejemplo n.º 33
0
def get_providers(index):
    """
    Given an index, find all available data providers and return their counts.

    :param index: An Elasticsearch index, such as `'image'`.
    :return: A dictionary mapping providers to the count of their images.`
    """
    provider_cache_name = 'providers-' + index
    providers = cache.get(key=provider_cache_name)
    if type(providers) == list:
        # Invalidate old provider format.
        cache.delete(key=provider_cache_name)
    if not providers:
        elasticsearch_maxint = 2147483647
        agg_body = {
            'aggs': {
                'unique_providers': {
                    'terms': {
                        'field': 'provider.keyword',
                                 'size': elasticsearch_maxint,
                        "order": {
                            "_key": "desc"
                        }
                    }
                }
            }
        }
        s = Search.from_dict(agg_body)
        s = s.index(index)
        try:
            results = s.execute().aggregations['unique_providers']['buckets']
        except NotFoundError:
            results = [{'key': 'none_found', 'doc_count': 0}]
        providers = {result['key']: result['doc_count'] for result in results}
        cache.set(
            key=provider_cache_name,
            timeout=CACHE_TIMEOUT,
            value=providers
        )
    return providers
Ejemplo n.º 34
0
    def doSearch(self, body):
        try:
            client = connections.create_connection(hosts=[settings.ES_URL])
            s = Search(using=client,
                       index=settings.ES_INDEX_NAME,
                       doc_type=settings.ES_INDEX_TYPE)
            s = Search.from_dict(body)
            s = s.index(settings.ES_INDEX_NAME)
            s = s.doc_type(settings.ES_INDEX_TYPE)

            # hightlight the following fields in the search result
            s = s.highlight('title')
            s = s.highlight('description')
            s = s.highlight('data_time')
            s = s.highlight('source')

            body = s.to_dict()
            response = s.execute()
        except Exception:
            return None

        return response
# 你不需要移植你的整个应用程序来获得Python DSL的好处,你可以从现有的dict中创建一个Search对象开始,使用API​​修改它,然后将它序列化为一个字典:

# 复杂的查询
body = {
        "query": {
            "match": {
                "query_question": {
                    "query": "恢复的效力你知道是什么意思啊",
                    "minimum_should_match": "30%"
                }
            }
        },
        "size": 20
    }

# 转换为搜索对象
s = Search.from_dict(body)

# 添加一些过滤器,聚合,查询,...
s.filter("term", tags="python")

# 转换回字典插回到现有的代码
body = s.to_dict()

def main():
    pass


if __name__ == '__main__':
    main()
rows = c.execute(periodical_ids_query).fetchall()

#set up elastic connection
client = connections.create_connection(hosts=['http://localhost:9200'],
                                       timeout=60,
                                       max_retries=10,
                                       retry_on_timeout=True)
s = Search(using=client, index="documents", doc_type="article")

for i in rows:
    #CURL code here
    body = {
        "_source": ["Publication.Title", "Publication.Qualifier"],
        "query": {
            "match": {
                "Publication.PublicationID": i[1]
            }
        }
    }
    #get all articles by id
    s = s.from_dict(body)
    t = s.execute()

    #get first result (all ids should have at least one, or else where did the id come from?)
    first_hit = t.to_dict()['hits']['hits'][0]

    qual = first_hit['_source']['Publication']['Qualifier']
    text_title = first_hit['_source']['Publication']['Title']
    insert_statement = "INSERT INTO periodical_meta (_id, periodical_id, aps_id, title, qualifier) VALUES (null, ?, ?, ?, ?)"
    c.execute(insert_statement, (i[0], i[1], text_title, qual))
    conn.commit()
Ejemplo n.º 37
0
    def get(self, request):
        # 获取关键词参数
        key_words = request.GET.get('s', None)
        city = request.GET.get('city', None)
        lat = request.GET.get('lat', None)
        lon = request.GET.get('lon', None)
        price_range = request.GET.get('pricerange', None)
        page = request.GET.get('p', 1)
        try:
            page = int(page)
        except ValueError:
            page = 1
        query_dict = {
            'query': {
                'bool': {
                    'filter': [],
                    'must': []
                }
            },
            'from': 10 * (page - 1),
            'size': 10,
            '_source': [
                'name', 'address', 'city', 'state', 'postal_code', 'neighborhood', 'stars', 'review_count',
                'location', 'is_open', 'attribute.*', 'category'
            ],
            'highlight': {
                'fields': {
                    'name': {}
                },
                'pre_tags': '<span class="">',
                'post_tags': '</span>'
            }
        }

        if key_words:
            query_dict['query']['bool']['must'].append(
                dict(multi_match={
                    'query': key_words,
                    'fields': ['name^3', 'category'],
                    'boost': 2.0
                }))

        if city:
            city = str(city).replace('-', ' ')
            query_dict['query']['bool']['must'].append(
                dict(multi_match={
                    'query': city,
                    'fields': ['city^3', 'address'],
                    'boost': 1.0
                }))

        if price_range:
            query_dict['query']['bool']['filter'].append(
                dict(
                    nested={
                        'path': 'attribute',
                        'score_mode': 'avg',
                        'query': {
                            'bool': {
                                'must': [
                                    {'match': {'attribute.RestaurantsPriceRange2': price_range}}
                                ]
                            }
                        }
                    }
                )
            )

        try:
            if lat and lon:
                lat = float(lat)
                lon = float(lon)
                query_dict['query']['bool']['filter'].append(
                    dict(geo_distance={
                        'location': [lon, lat],
                        'distance': '20km'
                    }))
        except ValueError:
            return JsonResponse(CustomResponseJson('位置信息错误', code=0))
        start_time = datetime.now()
        try:
            s = Search.from_dict(query_dict)
            response = s.execute()
        except ConnectionError:
            return JsonResponse(CustomResponseJson(msg='搜索失败', code=0))

        end_time = datetime.now()
        last_time = (end_time - start_time).total_seconds()
        total_nums = response['hits']['total']
        if total_nums % 10 > 0:
            page_nums = int(total_nums / 10) + 1
        else:
            page_nums = int(total_nums / 10)

        if page < total_nums:
            has_next = True
        else:
            has_next = False

        hit_list = response.hits.hits
        restaurant_list = list()
        data = dict(
            last_time=last_time, page_nums=page_nums,
            key_words=key_words, total_nums=total_nums,
            data=restaurant_list, has_next=has_next,
            city=city
        )
        for hit_dict in hit_list:
            restaurant_id = hit_dict.get('_id', None)
            cover_id = get_cover(restaurant_id)
            if cover_id:
                cover_url = PHOTO_STATIC_URL_FORMAT.format(str(cover_id))
            else:
                cover_url = 'http://58.87.109.246/static/rrsite/default-cover.jpg'
            restaurant_info = hit_dict.get('_source', None)
            restaurant_info['id'] = restaurant_id
            restaurant_info['cover_url'] = cover_url
            highlight = hit_dict.get('highlight', None)
            if highlight:
                name = highlight.get('name', None)
                if name is not None:
                    restaurant_info['name'] = name[0]
            restaurant_list.append(restaurant_info)
        return JsonResponse(CustomResponseJson(msg='搜索成功', code=1, data=data))
Ejemplo n.º 38
0
def test_add_channel(settings, mock_staff_client, mocker, patched_users_api):
    """add_channel should tell open-discussions to create a channel"""
    mock_staff_client.channels.create.return_value.ok = True
    settings.FEATURES['OPEN_DISCUSSIONS_USER_UPDATE'] = True

    title = "title"
    name = "name"
    description = "description"
    channel_type = "private"
    input_search = Search.from_dict({"unmodified": "search"})
    modified_search = Search.from_dict({"result": "modified"})

    adjust_search_for_percolator_stub = mocker.patch(
        'discussions.api.adjust_search_for_percolator',
        autospec=True,
        return_value=modified_search,
    )

    program = ProgramFactory.create()
    contributors = [UserFactory.create() for _ in range(5)]
    for user in contributors:
        ProgramEnrollmentFactory.create(user=user, program=program)
    populate_memberships_task_stub = mocker.patch('search.api.populate_query_memberships', autospec=True)
    add_moderators_task_stub = mocker.patch('discussions.api.add_moderators_to_channel', autospec=True)
    add_subscriber_stub = mocker.patch('discussions.api.add_subscriber_to_channel', autospec=True)
    add_moderator_stub = mocker.patch('discussions.api.add_moderator_to_channel', autospec=True)

    mod = UserFactory.create()
    channel = api.add_channel(
        original_search=input_search,
        title=title,
        name=name,
        description=description,
        channel_type=channel_type,
        program_id=program.id,
        creator_id=mod.id,
    )

    mock_staff_client.channels.create.assert_called_once_with(
        title=title,
        name=name,
        description=description,
        channel_type=channel_type,
    )
    adjust_search_for_percolator_stub.assert_called_once_with(input_search)

    assert channel.name == name
    query = channel.query
    assert query.source_type == PercolateQuery.DISCUSSION_CHANNEL_TYPE
    assert query.original_query == input_search.to_dict()
    assert query.query == modified_search.to_dict()

    assert ChannelProgram.objects.count() == 1
    channel_program = ChannelProgram.objects.first()
    assert channel_program.program == program
    assert channel_program.channel == channel

    populate_memberships_task_stub.assert_called_once_with(query.id)
    add_moderators_task_stub.assert_called_once_with(channel.name)

    add_subscriber_stub.assert_called_once_with(channel.name, mod.discussion_user.username)
    add_moderator_stub.assert_called_once_with(channel.name, mod.discussion_user.username)
    _, updated_stub = patched_users_api
    updated_stub.assert_any_call(mod.discussion_user, allow_email_optin=False)