Example #1
0
    def test_use_of_custom_http_clients(self):
        mocked_http_client = Mock()
        mocked_http_client.fetch = Mock()

        es_connection = ESConnection("localhost",
                                     "9200",
                                     self.io_loop,
                                     custom_client=mocked_http_client)

        es_connection.search(callback=self.stop,
                             source={"query": {"term": {"ID": "171171"}}},
                             type="materia", index="teste")

        mocked_http_client.fetch.assert_called()
Example #2
0
    def test_use_of_custom_http_clients(self):
        mocked_http_client = Mock()
        mocked_http_client.fetch = Mock()

        es_connection = ESConnection("localhost",
                                     "9200",
                                     self.io_loop,
                                     custom_client=mocked_http_client)

        es_connection.search(callback=self.stop,
                             source={"query": {"term": {"ID": "171171"}}},
                             type="materia", index="teste")

        mocked_http_client.fetch.assert_called()
Example #3
0
class TestESConnectionWithTornadoGen(AsyncTestCase):

    def setUp(self):
        super(TestESConnectionWithTornadoGen, self).setUp()
        self.es_connection = ESConnection("localhost", "9200", self.io_loop)

    def tearDown(self):
        if (not IOLoop.initialized() or self.io_loop is not IOLoop.instance()):
            self.io_loop.close(all_fds=True)
        super(AsyncTestCase, self).tearDown()

    @gen_test
    def test_simple_search(self):
        response = yield self.es_connection.get_by_path("/_search?q=_id:http\:\/\/localhost\/noticia\/2\/fast", self.stop)

        response = self._verify_status_code_and_return_response(response)

        self.assertEqual(response["hits"]["total"], 1)
        self.assertEqual(response["hits"]["hits"][0]["_id"], u'http://localhost/noticia/2/fast')

    @gen_test
    def test_search_for_specific_type_with_query(self):
        response = yield self.es_connection.search(
            source={"query": {"term": {"ID": "171171"}}},
            type="materia", index="teste"
        )

        response = self._verify_status_code_and_return_response(response)
        self.assertEqual(response["hits"]["total"], 1)
        self.assertEqual(response["hits"]["hits"][0]["_id"], u'171171')

    @gen_test
    def test_search_specific_index(self):
        response = yield self.es_connection.search(index="outroteste")
        response = self._verify_status_code_and_return_response(response)
        self.assertEqual(response["hits"]["total"], 14)

    @gen_test
    def test_search_apecific_type(self):
        response = yield self.es_connection.search(type='galeria')
        response = self._verify_status_code_and_return_response(response)
        self.assertEqual(response["hits"]["total"], 2)

    @gen_test
    def test_should_access_specific_document_using_tornado_gen(self):
        response = yield self.es_connection.get(index="teste", type="materia", uid="171171")
        response = response["_source"]
        self.assertEqual(response['Portal'], "G1")
        self.assertEqual(response['Macrotema'], "Noticias")

    @gen_test
    def test_should_make_two_searches(self):
        self._make_multisearch()
        response = yield self.es_connection.apply_search()
        response = self._verify_status_code_and_return_response(response)

        self.assertEqual(response['responses'][0]['hits']['hits'][0]['_id'], "171171")
        self.assertFalse("hits" in response['responses'][1])

    @gen_test
    def test_should_clean_search_list_after_search(self):
        self._make_multisearch()
        response = yield self.es_connection.apply_search()
        response = self._verify_status_code_and_return_response(response)

        self.assertListEqual([], self.es_connection.bulk.bulk_list)

    @gen_test
    def test_can_put_and_delete_document(self):
        try:
            doc_id = str(uuid4())

            response = yield self.es_connection.put("test", "document", doc_id, {
                "test": "document",
                "other": "property"
            }, parameters={'refresh': True})

            response_dict = self._verify_status_code_and_return_response(response)
            self.assertEqual(response_dict['_index'], 'test')
            self.assertEqual(response_dict['_type'], 'document')
            self.assertEqual(response_dict['_id'], doc_id)
            self.assertIn('refresh=True', response.request.url)
        finally:
            response = yield self.es_connection.delete("test", "document", doc_id,
                                                       parameters={'refresh': True})
            response = self._verify_status_code_and_return_response(response)

            self.assertTrue(response['found'])
            self.assertEqual(response['_index'], 'test')
            self.assertEqual(response['_type'], 'document')
            self.assertEqual(response['_id'], doc_id)

    @gen_test
    def test_count_specific_index(self):
        response = yield self.es_connection.count(index="outroteste")
        self.assertCount(response, 14)

    @gen_test
    def test_count_specific_type(self):
        response = yield self.es_connection.count(type='galeria')
        self.assertCount(response, 2)

    @gen_test
    def test_count_specific_query(self):
        source = {"query": {"term": {"_id": "171171"}}}
        response = yield self.es_connection.count(source=source)
        self.assertCount(response, 1)

    @gen_test
    def test_count_specific_query_with_parameters(self):
        source = {"query": {"term": {"_id": "171171"}}}
        parameters = {'refresh': True}
        response = yield self.es_connection.count(callback=self.stop, source=source, parameters=parameters)
        self.assertCount(response, 1)
        self.assertTrue(response.request.url.endswith('_count?refresh=True'))

    @gen_test
    def test_count_specific_query_with_many_parameters(self):
        source = {"query": {"term": {"_id": "171171"}}}
        parameters = {'df': '_id', 'test': True}
        response = yield self.es_connection.count(callback=self.stop, source=source, parameters=parameters)
        self.assertCount(response, 1)
        self.assertTrue('df=_id' in response.request.url)
        self.assertTrue('test=True' in response.request.url)

    def assertCount(self, response, count):
        response_dict = self._verify_status_code_and_return_response(response)
        self.assertEqual(response_dict["count"], count)

    def _make_multisearch(self):
        source = {"query": {"term": {"_id": "171171"}}}
        self.es_connection.multi_search(index="teste", source=source)
        source = {"query": {"term": {"_id": "101010"}}}
        self.es_connection.multi_search(index="neverEndIndex", source=source)

    def _verify_status_code_and_return_response(self, response):
        self.assertTrue(response.code in [200, 201], "Wrong response code: %d." % response.code)
        response = escape.json_decode(response.body)
        return response
Example #4
0
class ElasticSearchProvider(SearchProvider):
    def __init__(self, config, db=None, authnz_wrapper=None, io_loop=None):
        self.debug = False
        self.config = config
        if db is not None:
            self.db = db
        self.syncES = ElasticSearch(
            '%(ELASTIC_SEARCH_PROTOCOL)s://%(ELASTIC_SEARCH_HOST)s:%(ELASTIC_SEARCH_PORT)s' % config
        )
        self.asyncES = ESConnection(
            host=config.get('ELASTIC_SEARCH_HOST'),
            port=config.get('ELASTIC_SEARCH_PORT'),
            io_loop=io_loop,
            protocol=config.get('ELASTIC_SEARCH_PROTOCOL'),
        )
        self.index = config.get('ELASTIC_SEARCH_INDEX')
        self.max_retries = config.get('ELASTIC_SEARCH_MAX_RETRIES')

    def activate_debug(self):
        self.debug = True

    def connect_to_db(self):
        from sqlalchemy import create_engine
        from sqlalchemy.orm import scoped_session, sessionmaker
        conn_string = self.config.get('SQLALCHEMY_CONNECTION_STRING')
        engine = create_engine(
            conn_string,
            convert_unicode=True,
            pool_size=1,
            max_overflow=0,
            echo=self.debug
        )
        maker = sessionmaker(bind=engine, autoflush=True)
        self.db = scoped_session(maker)

    def _assemble_inner_query(self, domain=None, page_filter=None):
        if page_filter and domain:
            page_prefix = '%s/%s' % (domain.url, page_filter)
        else:
            page_prefix = None

        if page_prefix:
            return {
                'prefix': {
                    'page_url': page_prefix
                }
            }
        else:
            return {
                'match_all': {}
            }

    def _assemble_outer_query(self, inner_query, filter_terms):
        return {
            'filtered': {
                'query': inner_query,
                'filter': {
                    'and': [{
                        'term': filter_term
                    } for filter_term in filter_terms]
                }
            }
        }

    def _assemble_filter_terms(self, key_id=None, domain=None):
        filter_terms = []

        if key_id:
            filter_terms.append({'keys.id': key_id})

        if domain:
            filter_terms.append({'domain_id': domain.id})

        return filter_terms

    def gen_doc(self, review):
        return {
            'keys': [{'id': violation.key_id} for violation in review.violations],
            'uuid': str(review.uuid),
            'completed_date': review.completed_date,
            'violation_count': review.violation_count,
            'page_id': review.page_id,
            'page_uuid': str(review.page.uuid),
            'page_url': review.page.url,
            'page_last_review_date': review.page.last_review_date,
            'domain_id': review.domain_id,
            'domain_name': review.domain.name,
        }

    def index_review(self, review):
        for attempt in range(self.max_retries):
            try:
                self.syncES.send_request(
                    method='POST',
                    path_components=[self.index, 'review', review.page_id],
                    body=dumps(self.gen_doc(review)),
                    encode_body=False
                )
                break
            except (Timeout, ConnectionError, ElasticHttpError, InvalidJsonResponseError) as e:
                values = review.id, review.page_id, str(e)
                logging.error('Could not index review (review_id:{0}, page_id:{1}): {2}'.format(*values))
                time.sleep(1)
                if attempt >= self.max_retries - 1:
                    raise
            else:
                raise

    def index_reviews(self, reviewed_pages, reviews_count, batch_size):
        action = {'index': {'_type': 'review'}}

        for i in range(0, reviews_count, batch_size):
            body_bits = []

            for page in reviewed_pages[i:i + batch_size]:
                doc = self.gen_doc(page.last_review)

                action['index']['_id'] = doc['page_id']

                body_bits.append(dumps(action))
                body_bits.append(dumps(doc))

            # Yes, that trailing newline IS necessary
            body = '\n'.join(body_bits) + '\n'

            self.syncES.send_request(
                method='POST',
                path_components=[self.index, '_bulk'],
                body=body,
                encode_body=False
            )

        logging.info('Done!')

    @return_future
    def get_by_violation_key_name(self, key_id, current_page=1, page_size=10, domain=None, page_filter=None, callback=None):
        def treat_response(response):
            if response.error is None:
                try:
                    hits = loads(response.body).get('hits', {'hits': []})

                    reviews_data = []
                    for hit in hits['hits']:
                        completedAt = datetime.utcfromtimestamp(hit['_source']['completed_date'])
                        reviews_data.append({
                            'uuid': hit['_source']['uuid'],
                            'page': {
                                'uuid': hit['_source']['page_uuid'],
                                'url': hit['_source']['page_url'],
                                'completedAt': completedAt
                            },
                            'domain': hit['_source']['domain_name']
                        })

                    reviews_count = hits.get('total', 0)

                    callback({
                        'reviews': reviews_data,
                        'reviewsCount': reviews_count
                    })
                except Exception as e:
                    reason = 'ElasticSearchProvider: invalid response (%s [%s])' % (type(e), e.message)
                    logging.error(reason)
                    callback({'error': {'status_code': 500, 'reason': reason}})
            else:
                reason = 'ElasticSearchProvider: erroneous response (%s [%s])' % (response.error.message, response.body)
                logging.error(reason)
                callback({'error': {'status_code': 500, 'reason': reason}})

        inner_query = self._assemble_inner_query(domain, page_filter)
        filter_terms = self._assemble_filter_terms(key_id, domain)

        query = self._assemble_outer_query(inner_query, filter_terms)

        sort_ = [{
            'completed_date': {
                'order': 'desc'
            }
        }, {
            'violation_count': {
                'order': 'desc'
            }
        }]

        source = {'query': query, 'sort': sort_}

        self.asyncES.search(
            callback=treat_response,
            index=self.index,
            type='review',
            source=source,
            page=current_page,
            size=page_size,
        )

    @return_future
    def get_domain_active_reviews(self, domain, current_page=1, page_size=10, page_filter=None, callback=None):
        def treat_response(response):
            if response.error is None:
                try:
                    hits = loads(response.body).get('hits', {'hits': []})

                    pages = []
                    for hit in hits['hits']:
                        completedAt = datetime.utcfromtimestamp(hit['_source']['completed_date'])
                        pages.append({
                            'url': hit['_source']['page_url'],
                            'uuid': hit['_source']['page_uuid'],
                            'violationCount': len(hit['_source']['keys']),
                            'completedAt': completedAt,
                            'reviewId': hit['_source']['uuid']
                        })

                    reviews_count = hits.get('total', 0)

                    callback({
                        'reviewsCount': reviews_count,
                        'pages': pages
                    })
                except Exception as e:
                    reason = 'ElasticSearchProvider: invalid response (%s [%s])' % (type(e), e.message)
                    logging.error(reason)
                    callback({'error': {'status_code': 500, 'reason': reason}})
            else:
                reason = 'ElasticSearchProvider: erroneous response (%s [%s])' % (response.error.message, response.body)
                logging.error(reason)
                callback({'error': {'status_code': 500, 'reason': reason}})

        inner_query = self._assemble_inner_query(domain=domain, page_filter=page_filter)
        filter_terms = self._assemble_filter_terms(domain=domain)

        query = self._assemble_outer_query(inner_query, filter_terms)

        sort_ = [{
            'violation_count': {
                'order': 'desc'
            }
        }, {
            'completed_date': {
                'order': 'desc'
            }
        }]

        source = {'query': query, 'sort': sort_}

        self.asyncES.search(
            callback=treat_response,
            index=self.index,
            type='review',
            source=source,
            page=current_page,
            size=page_size,
        )

    def refresh(self):
        try:
            self.syncES.refresh(index=self.index)
        except Exception as e:
            logging.error('Could not refresh index (%s)' % e)

    def get_index_settings(cls):
        return {
            'index': {
                'number_of_shards': 4
            }
        }

    def get_index_mapping(cls):
        return {
            'review': {
                'properties': {
                    'keys': {
                        'properties': {
                            'id': {
                                'type': 'integer'
                            }
                        }
                    },
                    'uuid': {
                        'type': 'string',
                        'index': 'not_analyzed'
                    },
                    'completed_date': {
                        'type': 'integer'
                    },
                    'violation_count': {
                        'type': 'float'
                    },
                    'page_id': {
                        'type': 'integer'
                    },
                    'page_uuid': {
                        'type': 'string',
                        'index': 'not_analyzed'
                    },
                    'page_url': {
                        'type': 'string',
                        'index': 'not_analyzed'
                    },
                    'page_last_review_date': {
                        'type': 'integer'
                    },
                    'domain_id': {
                        'type': 'integer'
                    },
                    'domain_name': {
                        'type': 'string',
                        'index': 'not_analyzed'
                    }
                }
            }
        }

    def setup_index(self):
        try:
            settings = self.get_index_settings()
            self.syncES.create_index(index=self.index, settings=settings)
            mapping = self.get_index_mapping()
            self.syncES.put_mapping(index=self.index, doc_type='review', mapping=mapping)
            logging.info('Index %s created.' % self.index)
        except Exception as e:
            raise e

    def delete_index(self):
        try:
            self.syncES.delete_index(index=self.index)
            logging.info('Index %s deleted.' % self.index)
        except Exception as e:
            raise e

    def _get_max_page_id_from_index(self, must_have_domain_name=False):
        if must_have_domain_name:
            inner_query = {
                'constant_score': {
                    'filter': {
                        'not': {
                            'missing': {
                                'field': 'domain_name'
                            }
                        }
                    }
                }
            }
        else:
            inner_query = {
                'match_all': {}
            }

        query = {
            'query': inner_query,
            'sort': [{
                'page_id': {
                    'order': 'desc'
                }
            }]
        }

        results = self.syncES.search(query, index=self.index, doc_type='review')
        if results['hits']['total'] > 0:
            return results['hits']['hits'][0]['_id'] or 0
        return 0

    def index_all_reviews(self, keys=None, batch_size=200, replace=False):
        logging.info('Querying database...')
        self.connect_to_db()

        if keys is not None:
            keys = [k.id for k in self.db.query(Key.id).filter(Key.name.in_(keys)).all()]

        try:
            max_page_id = self._get_max_page_id_from_index(must_have_domain_name=True)
        except Exception:
            logging.error('Could not retrieve max page_id! Use with --replace (with caution)')
            return

        def apply_filters(query):
            if keys is not None:
                query = query \
                    .filter(Violation.review_id == Page.last_review_id) \
                    .filter(Violation.key_id.in_(keys))

            if not replace:
                query = query.filter(Page.id > max_page_id)

            return query.filter(Page.last_review_id != None)

        reviews_count = apply_filters(self.db.query(func.count(Page))).scalar()

        query = self.db.query(Page).options(joinedload('last_review'))
        reviewed_pages = apply_filters(query).order_by(Page.id.asc())

        logging.info('Indexing %d reviews...' % reviews_count)

        self.index_reviews(reviewed_pages, reviews_count, batch_size)

    @classmethod
    def new_instance(cls, config):
        return ElasticSearchProvider(config)

    @classmethod
    def main(cls):
        import sys

        parser = cls.argparser()
        args = parser.parse_args()

        config = {}
        host = None
        port = None
        index = None
        es = None

        levels = ['ERROR', 'WARNING', 'INFO', 'DEBUG']
        log_level = levels[args.verbose]
        logging.basicConfig(level=getattr(logging, log_level), format='%(levelname)s - %(message)s')

        if not (args.create or args.recreate or args.delete or args.keys or args.all_keys):
            parser.print_help()
            sys.exit(1)

        if args.conf:
            from derpconf.config import ConfigurationError
            from holmes.config import Config
            try:
                config = Config().load(args.conf[0])
                host = config['ELASTIC_SEARCH_HOST']
                port = config['ELASTIC_SEARCH_PORT']
                index = config['ELASTIC_SEARCH_INDEX']
            except ConfigurationError:
                logging.error('Could not load config! Use --conf conf_file')
                sys.exit(1)
            except KeyError:
                logging.error('Could not parse config! Check it\'s contents')
                sys.exit(1)

        if args.server:
            try:
                host, port = args.server[0].split(':')
                config['ELASTIC_SEARCH_HOST'] = host
                config['ELASTIC_SEARCH_PORT'] = port
            except Exception:
                logging.error('Could not parse server host and port! Use --server host:port')
                sys.exit(1)

        if args.index:
                index = args.index[0]
                config['ELASTIC_SEARCH_INDEX'] = index

        from pyelasticsearch.exceptions import IndexAlreadyExistsError, ElasticHttpNotFoundError, InvalidJsonResponseError
        from requests.exceptions import ConnectionError
        try:

            if args.create or args.recreate or args.delete:
                if host is None or port is None:
                    logging.error('Need either a host and port or a config file to perform such operation!')
                    sys.exit(1)
                if index is None:
                    logging.error('Need either an index name or a config file to perform such operation!')
                    sys.exit(1)
                else:
                    es = cls.new_instance(config)
                    if args.recreate or args.delete:
                        try:
                            es.delete_index()
                        except ElasticHttpNotFoundError:
                            pass
                        except InvalidJsonResponseError as e:
                            logging.error('Invalid response! Reason: %s' % e)
                            sys.exit(1)
                    if args.create or args.recreate:
                        es.setup_index()

            if args.keys or args.all_keys:
                if config is None:
                    logging.error('Need a config file to perform such operation! Use --conf conf_file')
                else:
                    batch_size = args.batch_size[0] if args.batch_size else 200
                    es = cls.new_instance(config) if not es else es
                    try:
                        if args.verbose > 2:
                            es.activate_debug()
                        if args.keys:
                            es.index_all_reviews(args.keys, replace=args.replace, batch_size=batch_size)
                        elif args.all_keys:
                            es.index_all_reviews(replace=args.replace, batch_size=batch_size)
                    except InvalidJsonResponseError as e:
                        logging.error('Invalid response! Reason: %s' % e)
                        sys.exit(1)

        except IndexAlreadyExistsError:
            logging.error('Index %s already exists! Use --recreate (with caution) to recreate' % index)
        except ConnectionError:
            logging.error('Could not connect to server at %s:%s' % (host, port))
        except KeyError:
            logging.error('Could not get host nor port! Use either -conf or --server')
            sys.exit(1)
Example #5
0
class ES:
    url = None
    index = None
    es = None

    def __init__(self, index=settings.DB_NAME):
        self.es = ESConnection()
        self.es.httprequest_kwargs['headers'] = {'Connection': 'keep-alive'}
        self.index = index

    @gen.coroutine
    def search(self, mapping, query, extra_params):
        fields = settings.RELAY_ES_FIELDS
        return_data = list()

        if query:
            query = " AND ".join(query.split())
        else:
            query = "*"

        time_q = extra_params['time']
        query += self.__parse_extra(extra_params)

        if mapping == "bridges":
            fields = settings.BRIDGE_ES_FIELDS

        body_query = {"_source": fields,
                      "query": {
                          "filtered": {
                              "query": {
                                  "query_string": {
                                      "query": query
                                  }
                              },
                              "filter":{
                                  "range": {
                                      time_q['type']: {
                                          "gte": time_q['range'][0] + " 00:00:00",
                                          "lte": time_q['range'][1] + " 23:59:59"
                                      }
                                  }
                              }
                          }
                      }}

        if settings.DEBUG:
            print body_query

        result = yield self.es.search(index=self.index, type=mapping, source=body_query, size=settings.ES_RESULT_SIZE)
        result = escape.json_decode(result.body)

        if 'hits' in result and 'hits' in result['hits']:
            for hit in result['hits']['hits']:
                return_data.append(hit['_source'])
        else:
            raise Exception(result['error'])

        raise gen.Return(return_data)

    def __parse_extra(self, extra):
        query = ""

        if 'country' in extra:
            new_country = list()
            geo_split = extra['country'].split(',')
            for geo in geo_split:
                new_country.append("geo:" + geo)

            query = "{0} AND (".format(query) + " OR ".join(new_country) + ")"

        if 'flags' in extra:
            flags_split = extra['flags'].split(',')
            for flag in flags_split:
                query = "{0} AND flags:{1}".format(query, flag.title())

        return query
Example #6
0
class TestESConnection(AsyncTestCase):

    def setUp(self):
        self.io_loop = self.get_new_ioloop()
        self.es_connection = ESConnection("localhost", "9200", self.io_loop)

    def tearDown(self):
        if (not IOLoop.initialized() or self.io_loop is not IOLoop.instance()):
            self.io_loop.close(all_fds=True)
        super(AsyncTestCase, self).tearDown()

    def test_simple_search(self):
        self.es_connection.get_by_path("/_search?q=_id:http\:\/\/localhost\/noticia\/2\/fast", self.stop)
        response = self._verify_status_code_and_return_response()
        self.assertEqual(response["hits"]["total"], 1)
        self.assertEqual(response["hits"]["hits"][0]["_id"], u'http://localhost/noticia/2/fast')

    def test_search_for_specific_type_with_query(self):
        self.es_connection.search(callback=self.stop,
                                  source={"query": {"text": {"ID": "171171"}}},
                                  type="materia", index="teste")

        response = self._verify_status_code_and_return_response()
        self.assertEqual(response["hits"]["total"], 1)
        self.assertEqual(response["hits"]["hits"][0]["_id"], u'171171')

    def test_search_all_entries(self):
        self.es_connection.search(self.stop)
        response = self._verify_status_code_and_return_response()
        self.assertEqual(response["hits"]["total"], 28)

    def test_search_specific_index(self):
        self.es_connection.search(callback=self.stop, index="outroteste")
        response = self._verify_status_code_and_return_response()
        self.assertEqual(response["hits"]["total"], 14)

    def test_search_apecific_type(self):
        self.es_connection.search(self.stop, type='galeria')
        response = self._verify_status_code_and_return_response()
        self.assertEqual(response["hits"]["total"], 2)

    def test_should_access_specific_documento(self):
        self.es_connection.get(index="teste", type="materia", uid="171171", callback=self.stop)
        response = self.wait()
        self.assertEqual(response['Portal'], "G1")
        self.assertEqual(response['Macrotema'], "Noticias")

    def test_should_accumulate_searches_before_search(self):
        source = {"query": {"text": {"_id": "171171"}}}
        self.es_connection.multi_search("teste", source=source)
        source = {"query": {"text": {"body": "multisearch"}}}
        self.es_connection.multi_search("neverEndIndex", source=source)

        self.assertListEqual(['{"index": "teste"}\n{"query": {"text": {"_id": "171171"}}}',
                              '{"index": "neverEndIndex"}\n{"query": {"text": {"body": "multisearch"}}}'
                              ], self.es_connection.bulk.bulk_list)

    def test_should_generate_empty_header_with_no_index_specified(self):
        source = {"query": {"text": {"_id": "171171"}}}
        self.es_connection.multi_search(index=None, source=source)
        source = {"query": {"text": {"body": "multisearch"}}}
        self.es_connection.multi_search(index=None, source=source)

        self.assertListEqual(['{}\n{"query": {"text": {"_id": "171171"}}}',
                              '{}\n{"query": {"text": {"body": "multisearch"}}}'
                              ], self.es_connection.bulk.bulk_list)

    def test_should_make_two_searches(self):
        self._make_multisearch()
        response = self._verify_status_code_and_return_response()
        self.assertEqual(response['responses'][0]['hits']['hits'][0]['_id'], "171171")
        self.assertFalse("hits" in response['responses'][1])

    def test_should_clean_search_list_after_search(self):
        self._make_multisearch()
        self.wait()
        self.assertListEqual([], self.es_connection.bulk.bulk_list)

    def _make_multisearch(self):
        source = {"query": {"text": {"_id": "171171"}}}
        self.es_connection.multi_search(index="teste", source=source)
        source = {"query": {"text": {"_id": "101010"}}}
        self.es_connection.multi_search(index="neverEndIndex", source=source)

        self.es_connection.apply_search(callback=self.stop)

    def _verify_status_code_and_return_response(self):
        response = self.wait()
        self.assertEqual(response.code, 200, "Wrong response code.")
        response = escape.json_decode(response.body)
        return response
Example #7
0
class TestESConnectionWithTornadoGen(AsyncTestCase):
    def setUp(self):
        self.io_loop = self.get_new_ioloop()
        self.es_connection = ESConnection("localhost", "9200", self.io_loop)

    def tearDown(self):
        if (not IOLoop.initialized() or self.io_loop is not IOLoop.instance()):
            self.io_loop.close(all_fds=True)
        super(AsyncTestCase, self).tearDown()

    @gen_test
    def test_simple_search(self):
        response = yield self.es_connection.get_by_path(
            "/_search?q=_id:http\:\/\/localhost\/noticia\/2\/fast", self.stop)

        response = self._verify_status_code_and_return_response(response)

        self.assertEqual(response["hits"]["total"], 1)
        self.assertEqual(response["hits"]["hits"][0]["_id"],
                         u'http://localhost/noticia/2/fast')

    @gen_test
    def test_search_for_specific_type_with_query(self):
        response = yield self.es_connection.search(
            source={"query": {
                "term": {
                    "ID": "171171"
                }
            }},
            type="materia",
            index="teste")

        response = self._verify_status_code_and_return_response(response)
        self.assertEqual(response["hits"]["total"], 1)
        self.assertEqual(response["hits"]["hits"][0]["_id"], u'171171')

    @gen_test
    def test_search_specific_index(self):
        response = yield self.es_connection.search(index="outroteste")
        response = self._verify_status_code_and_return_response(response)
        self.assertEqual(response["hits"]["total"], 14)

    @gen_test
    def test_search_apecific_type(self):
        response = yield self.es_connection.search(type='galeria')
        response = self._verify_status_code_and_return_response(response)
        self.assertEqual(response["hits"]["total"], 2)

    @gen_test
    def test_should_access_specific_documento(self):
        response = yield self.es_connection.get(index="teste",
                                                type="materia",
                                                uid="171171")
        self.assertEqual(response['Portal'], "G1")
        self.assertEqual(response['Macrotema'], "Noticias")

    @gen_test
    def test_should_make_two_searches(self):
        self._make_multisearch()
        response = yield self.es_connection.apply_search()
        response = self._verify_status_code_and_return_response(response)

        self.assertEqual(response['responses'][0]['hits']['hits'][0]['_id'],
                         "171171")
        self.assertFalse("hits" in response['responses'][1])

    @gen_test
    def test_should_clean_search_list_after_search(self):
        self._make_multisearch()
        response = yield self.es_connection.apply_search()
        response = self._verify_status_code_and_return_response(response)

        self.assertListEqual([], self.es_connection.bulk.bulk_list)

    @gen_test
    def test_can_put_and_delete_document(self):
        try:
            doc_id = str(uuid4())

            response = yield self.es_connection.put(
                "test",
                "document",
                doc_id, {
                    "test": "document",
                    "other": "property"
                },
                parameters={'refresh': True})

            response_dict = self._verify_status_code_and_return_response(
                response)
            self.assertEqual(response_dict['_index'], 'test')
            self.assertEqual(response_dict['_type'], 'document')
            self.assertEqual(response_dict['_id'], doc_id)
            self.assertIn('refresh=True', response.request.url)
        finally:
            response = yield self.es_connection.delete(
                "test", "document", doc_id, parameters={'refresh': True})
            response = self._verify_status_code_and_return_response(response)

            self.assertTrue(response['found'])
            self.assertEqual(response['_index'], 'test')
            self.assertEqual(response['_type'], 'document')
            self.assertEqual(response['_id'], doc_id)

    @gen_test
    def test_count_specific_index(self):
        response = yield self.es_connection.count(index="outroteste")
        self.assertCount(response, 14)

    @gen_test
    def test_count_specific_type(self):
        response = yield self.es_connection.count(type='galeria')
        self.assertCount(response, 2)

    @gen_test
    def test_count_specific_query(self):
        source = {"query": {"term": {"_id": "171171"}}}
        response = yield self.es_connection.count(source=source)
        self.assertCount(response, 1)

    @gen_test
    def test_count_specific_query_with_parameters(self):
        source = {"query": {"term": {"_id": "171171"}}}
        parameters = {'refresh': True}
        response = yield self.es_connection.count(callback=self.stop,
                                                  source=source,
                                                  parameters=parameters)
        self.assertCount(response, 1)
        self.assertTrue(response.request.url.endswith('_count?refresh=True'))

    @gen_test
    def test_count_specific_query_with_many_parameters(self):
        source = {"query": {"term": {"_id": "171171"}}}
        parameters = {'df': '_id', 'test': True}
        response = yield self.es_connection.count(callback=self.stop,
                                                  source=source,
                                                  parameters=parameters)
        self.assertCount(response, 1)
        self.assertTrue(
            response.request.url.endswith('_count?df=_id&test=True'))

    def assertCount(self, response, count):
        response_dict = self._verify_status_code_and_return_response(response)
        self.assertEqual(response_dict["count"], count)

    def _make_multisearch(self):
        source = {"query": {"term": {"_id": "171171"}}}
        self.es_connection.multi_search(index="teste", source=source)
        source = {"query": {"term": {"_id": "101010"}}}
        self.es_connection.multi_search(index="neverEndIndex", source=source)

    def _verify_status_code_and_return_response(self, response):
        self.assertTrue(response.code in [200, 201],
                        "Wrong response code: %d." % response.code)
        response = escape.json_decode(response.body)
        return response
Example #8
0
class TestESConnection(AsyncTestCase):

    def setUp(self):
        self.io_loop = self.get_new_ioloop()
        self.es_connection = ESConnection("localhost", "9200", self.io_loop)

    def tearDown(self):
        if (not IOLoop.initialized() or self.io_loop is not IOLoop.instance()):
            self.io_loop.close(all_fds=True)
        super(AsyncTestCase, self).tearDown()

    def test_simple_search(self):
        self.es_connection.get_by_path("/_search?q=_id:http\:\/\/localhost\/noticia\/2\/fast", self.stop)
        response = self._verify_status_code_and_return_response()
        self.assertEqual(response["hits"]["total"], 1)
        self.assertEqual(response["hits"]["hits"][0]["_id"], u'http://localhost/noticia/2/fast')

    def test_search_for_specific_type_with_query(self):
        self.es_connection.search(callback=self.stop,
                                  source={"query": {"text": {"ID": "171171"}}},
                                  type="materia", index="teste")

        response = self._verify_status_code_and_return_response()
        self.assertEqual(response["hits"]["total"], 1)
        self.assertEqual(response["hits"]["hits"][0]["_id"], u'171171')

    def test_search_all_entries(self):
        self.es_connection.search(self.stop)
        response = self._verify_status_code_and_return_response()
        self.assertEqual(response["hits"]["total"], 28)

    def test_search_specific_index(self):
        self.es_connection.search(callback=self.stop, index="outroteste")
        response = self._verify_status_code_and_return_response()
        self.assertEqual(response["hits"]["total"], 14)

    def test_search_apecific_type(self):
        self.es_connection.search(self.stop, type='galeria')
        response = self._verify_status_code_and_return_response()
        self.assertEqual(response["hits"]["total"], 2)

    def test_should_access_specific_documento(self):
        self.es_connection.get(index="teste", type="materia", uid="171171", callback=self.stop)
        response = self.wait()
        self.assertEqual(response['Portal'], "G1")
        self.assertEqual(response['Macrotema'], "Noticias")

    def test_should_accumulate_searches_before_search(self):
        source = {"query": {"text": {"_id": "171171"}}}
        self.es_connection.multi_search("teste", source=source)
        source = {"query": {"text": {"body": "multisearch"}}}
        self.es_connection.multi_search("neverEndIndex", source=source)

        self.assertListEqual(['{"index": "teste"}\n{"query": {"text": {"_id": "171171"}}}',
                              '{"index": "neverEndIndex"}\n{"query": {"text": {"body": "multisearch"}}}'
                              ], self.es_connection.bulk.bulk_list)

    def test_should_generate_empty_header_with_no_index_specified(self):
        source = {"query": {"text": {"_id": "171171"}}}
        self.es_connection.multi_search(index=None, source=source)
        source = {"query": {"text": {"body": "multisearch"}}}
        self.es_connection.multi_search(index=None, source=source)

        self.assertListEqual(['{}\n{"query": {"text": {"_id": "171171"}}}',
                              '{}\n{"query": {"text": {"body": "multisearch"}}}'
                              ], self.es_connection.bulk.bulk_list)

    def test_should_make_two_searches(self):
        self._make_multisearch()
        response = self._verify_status_code_and_return_response()
        self.assertEqual(response['responses'][0]['hits']['hits'][0]['_id'], "171171")
        self.assertFalse("hits" in response['responses'][1])

    def test_should_clean_search_list_after_search(self):
        self._make_multisearch()
        self.wait()
        self.assertListEqual([], self.es_connection.bulk.bulk_list)

    def _make_multisearch(self):
        source = {"query": {"text": {"_id": "171171"}}}
        self.es_connection.multi_search(index="teste", source=source)
        source = {"query": {"text": {"_id": "101010"}}}
        self.es_connection.multi_search(index="neverEndIndex", source=source)

        self.es_connection.apply_search(callback=self.stop)

    def _verify_status_code_and_return_response(self):
        response = self.wait()
        return self._verify_response_and_returns_dict(response)

    def _verify_response_and_returns_dict(self, response):
        self.assertTrue(response.code in [200, 201], "Wrong response code: %d." % response.code)
        response = escape.json_decode(response.body)
        return response

    def test_can_put_and_delete_document(self):
        try:
            doc_id = str(uuid4())

            self.es_connection.put("test", "document", doc_id, {
                "test": "document",
                "other": "property"
            }, parameters={'refresh': True}, callback=self.stop)

            response = self.wait()
            response_dict = self._verify_response_and_returns_dict(response)
            self.assertEqual(response_dict['_index'], 'test')
            self.assertEqual(response_dict['_type'], 'document')
            self.assertTrue(response_dict['ok'])
            self.assertEqual(response_dict['_id'], doc_id)
            self.assertIn('refresh=True', response.request.url)
        finally:
            self.es_connection.delete("test", "document", doc_id,
                parameters={'refresh': True}, callback=self.stop)
            response = self._verify_status_code_and_return_response()

            self.assertTrue(response['found'])
            self.assertTrue(response['ok'])
            self.assertEqual(response['_index'], 'test')
            self.assertEqual(response['_type'], 'document')
            self.assertEqual(response['_id'], doc_id)

    def test_count_all_entries(self):
        self.es_connection.count(callback=self.stop)
        response = self._verify_status_code_and_return_response()
        self.assertEqual(response["count"], 28)

    def test_count_specific_index(self):
        self.es_connection.count(callback=self.stop, index="outroteste")
        response = self._verify_status_code_and_return_response()
        self.assertEqual(response["count"], 14)

    def test_count_specific_type(self):
        self.es_connection.count(callback=self.stop, type='galeria')
        response = self._verify_status_code_and_return_response()
        self.assertEqual(response["count"], 2)

    def test_count_specific_query(self):
        source = {"query": {"text": {"_id": "171171"}}}
        self.es_connection.count(callback=self.stop, source=source)
        response = self._verify_status_code_and_return_response()
        self.assertEqual(response["count"], 1)

    def test_count_specific_query_with_parameters(self):
        source = {"query": {"text": {"_id": "171171"}}}
        parameters = {'refresh': True}
        self.es_connection.count(callback=self.stop, source=source, parameters=parameters)
        response = self.wait()
        response_dict = self._verify_response_and_returns_dict(response)
        self.assertEqual(response_dict["count"], 1)
        self.assertTrue(response.request.url.endswith('_count?refresh=True'))

    def test_count_specific_query_with_many_parameters(self):
        source = {"query": {"text": {"_id": "171171"}}}
        parameters = {'df': '_id', 'test': True}
        self.es_connection.count(callback=self.stop, source=source, parameters=parameters)
        response = self.wait()
        response_dict = self._verify_response_and_returns_dict(response)
        self.assertEqual(response_dict["count"], 1)
        self.assertTrue(response.request.url.endswith('_count?df=_id&test=True'))