def test_search_can_be_limited_to_fields(data_client): s = Search(using=data_client).index('git').doc_type('repos').fields( 'organization') response = s.execute() assert response.hits.total == 1 assert response.hits[0] == {'organization': ['elasticsearch']}
def get_list_of_indexes_to_reindex(full_reindex=False): db_names = all_db_names() try: list_of_indexes_out_of_sync = [] total_submissions = 0 for database_name in db_names: dbm = get_db_manager(database_name) questionnaires = dbm.load_all_rows_in_view('questionnaire') if not questionnaires: continue for row in questionnaires: if row['value']['is_registration_model']: continue form_model_doc = FormModelDocument.wrap(row["value"]) if full_reindex or is_mapping_out_of_sync(form_model_doc, dbm): es = Elasticsearch(hosts=[{ "host": ELASTIC_SEARCH_HOST, "port": ELASTIC_SEARCH_PORT }]) search = Search(using=es, index=dbm.database_name, doc_type=form_model_doc.id) no_of_submissions = search.count() questionnaire_info = dict( db_name=database_name, questionnaire_id=form_model_doc.id, name=form_model_doc.name, no_of_submissions=no_of_submissions) total_submissions += no_of_submissions list_of_indexes_out_of_sync.append(questionnaire_info) return list_of_indexes_out_of_sync, total_submissions except Exception as e: pass
def get(self, request): database_name = get_database_name(request.user) search_text = lower(request.GET["term"] or "") es = Elasticsearch(hosts=[{ "host": ELASTIC_SEARCH_HOST, "port": ELASTIC_SEARCH_PORT }]) search = Search(using=es, index=database_name, doc_type="reporter") search = search.extra(**{"size": "10"}) resp = [] if search_text: query_text_escaped = ElasticUtilsHelper().replace_special_chars( search_text) query_fields = [ "name", "name_value", "name_exact", "short_code", "short_code_exact", "short_code_value" ] search = search.query("query_string", query=query_text_escaped, fields=query_fields) search_results = search.execute() resp = [{ "id": result.short_code, "label": self.get_label(result) } for result in search_results.hits] return HttpResponse(json.dumps(resp))
def get(self, request): """ API to fetch store @params: store_id: integer @returns: Retrive store information """ result = {} store_id = int(request.query_params.get('store_id')) serializer = FetchStoreSerializer(data=request.query_params) if serializer.is_valid(): search = Search(using=self.es, index="store_data_index") search = search.filter("match", store_id=store_id) data = search.execute().to_dict() if data['hits']['hits']: result['status'] = True result['data'] = data['hits']['hits'][0]['_source'] else: result['status'] = False result['message'] = 'No data found for the give store id' status_code = status.HTTP_200_OK else: status_code = status.HTTP_400_BAD_REQUEST result = {'status': False, 'message': serializer.errors} return Response(result, status=status_code)
def query_imagesi(classnum_list, classval_list, upfilename): #print("In query_imagesi") hit1 = set() image_set = set() #print("11. classnum_list =", classnum_list) #print("12. classval_list =", classval_list) if (len(classnum_list) > 7): lenclassnum = 7 QI = Q('match_all') s1 = Search(index='adknum') #s1 = Search(index='vgnum') classn = 1 for class_num in classnum_list: if classn > 7: #can make this 5-- break classn = classn + 1 QI = QI & Q('bool', must=[Q("match", classnum=class_num)]) s1 = s1.query(QI).using(client) response = s1.execute() hit_num = 0 simDict = {} similarityClass = np.zeros(15) for hit in s1.scan(): print("123. hit.classnum: ", hit.classnum) print("124. hit.classval: ", hit.classval) lenimgclassnum = len(hit.classnum) simDict[hit.imgfile] = 1.0 #similarityClass[lenimgclassnum] # Compute similarity and choose top 4 rather than random 4 ii = 0 similarityImg = 0.0 for classi in classnum_list: jj = 0 for classj in hit.classnum: if classi == int(classj): similarityClass[ii] = classval_list[ii] / (abs( (classval_list[ii] - int(hit.classval[jj]))) + 10) #print("144. similarityClass[ii] = ", similarityClass[ii]) similarityImg = similarityImg + similarityClass[ii] break jj = jj + 1 simDict[hit.imgfile] = similarityImg #print("130. simDict[hit.imgfile] = ", simDict[hit.imgfile], similarityImg) ii = ii + 1 #for key in sorted(simDict.keys(), reverse=True) : kk = 0 for img in sorted(simDict, key=simDict.get, reverse=True): print("140. ", img, simDict[img]) image_set.add(img) #image_set.add(hit.imgfile) #pick top 4 images (hit_nums) kk = kk + 1 if kk > 3: break return display_image_set(image_set, upfilename, '')
def query_images(object_list): #print("In query_images") hit1 = set() image_set = set() print("11. object_list =", object_list) QI = Q('match_all') #s1 = Search(index='bvgobjs_index') s1 = Search(index='idxo20') for objectk in object_list: print("objectk= ", objectk) QI = QI & Q("match", names=objectk) s1 = s1.query(QI).using(client) response = s1.execute() for hit in s1.scan(): print("33 ", hit.imgfile) image_set.add(hit.imgfile) print("image_set = {0}".format(image_set)) im = 0 #app.layout = serve_layout images_div = [] for image in image_set: if im > 3: break file, ext = os.path.splitext(image) image = file + '.png' print("66 image =", image) images_div.append(display_image(image)) im = im + 1 print("Please hit refresh...") # Here call callback - #serve_layout = app.layout = serve_layout(images_div)
def post(self): ts = self.args['_'] if abs(int(time() * 1000) - int(ts)) > 1800000: return {'success':0, 'message': '时间戳无效'}, 200 token = self.args['token'] appkey = self.args['appkey'] verify_token = flask_redis.get(appkey) if verify_token is None: return {'success': 0, 'message': 'token 无效'}, 200 else: verify_token = verify_token.decode('utf-8') if type(verify_token) == type(b'') else verify_token if verify_token != token: return {'success': 0, 'message': 'token 无效'}, 200 sign = self.args['sign'] if hash_sha256("{0},{1},{2}".format(ts, token, appkey)) != sign: return {'success': 0, 'message': 'sign 无效'}, 200 keyword = self.args['keyword'] query = Website.query.join(Token, Website.id==Token.website_id).filter(Token.appkey == appkey).first() domain = query.domain try: s = Search(using=client, index='suggest', doc_type='news') s = s.filter('term', website=domain).query('match', title=keyword) s = s[0:10] response = s.execute() return {'success': 1, 'data': response.to_dict()}, 200 except Exception as e: return {'success': 0, 'message': e}, 200
def test_search_type_count(data_client): s = Search(using=data_client, index='git') s.aggs.bucket('per_type', 'terms', field='_type') s = s.params(search_type='count') result = s.execute() assert [] == result.hits assert 2 == len(result.aggregations.per_type.buckets)
def __init__(self, config='cdr', size=2000): """ :param url: str Fully qualified url to an elasticsearch instance :param size: int| Size limit to set on elasticsearch query """ self.conn = connections.get_connection(config) self.elastic = Search('cdr', extra={'size': size})
def get(self, request): keyWords = request.GET.get('s', '') kw = {'using': client, 'index': 'hfut_search', 'doc_type': 'hfut_type'} sugg = Search(**kw) sugg = sugg.suggest('my_suggest', keyWords, completion={ "field": "suggest", "size": common.FETCH_NUMBER_DEF['pageNum'] }) sugg = sugg.execute() options = sugg.suggest['my_suggest'][0].options reDatas = [match._source["title"] for match in options] return HttpResponse(json.dumps(reDatas), content_type="application/json")
def search(cls, **kwargs): kwargs.update({ 'using': connections.get_connection(), 'index': cls.get_index(), 'doc_type': { cls._doc_type.name: cls.from_es }, }) sq = Search(**kwargs) # Add highlighting. sq = sq.highlight(*cls.excerpt_fields) sq = sq.highlight_options(order='score') return sq
async def test_msearch(data_client): async_client = AsyncElasticsearch(hosts=['localhost']) s = Search(using=async_client).index('git') ms = MultiSearch(using=async_client).index('git') ms = ms.add(s).add(s) r1, r2 = await ms.execute() assert all([r1.success(), r2.success()])
def test_render_contributions_handles_unicode(): hits = Response( Search(), { 'hits': { 'hits': [ { '_type': 'hep', '_source': { 'control_number': 1427573, 'titles': [ { 'title': u'Storage Ring Based EDM Search — Achievements and Goals' }, ], }, }, ], 'total': 1, }, }).hits expected = ([ [ u"<a href='/literature/1427573'>Storage Ring Based EDM Search — Achievements and Goals</a>", u'\n\n', '', 0, ], ], 1) result = render_contributions(hits) assert expected == result
def test_render_people(): hits = Response( Search(), { 'hits': { 'hits': [ { '_type': 'authors', '_source': { 'control_number': 1, 'name': { 'preferred_name': 'preferred_name', }, }, }, ], 'total': 1, }, }).hits expected = ([ [ "<a href='/authors/1'>preferred_name</a>", ], ], 1) result = render_people(hits) assert expected == result
def test_render_contributions(): hits = Response( Search(), { 'hits': { 'hits': [ { '_type': 'hep', '_source': { 'citation_count': 1, 'control_number': 1, 'publication_info': [ { 'journal_title': 'first-journal_title' }, ], 'titles': [ { 'title': 'first-title' }, ], }, }, { '_type': 'hep', '_source': { 'control_number': 2, 'titles': [ { 'title': 'second-title' }, ], }, }, ], 'total': 2, }, }).hits expected = ([ [ "<a href='/literature/1'>first-title</a>", u'\n\n', 'first-journal_title', 1, ], [ "<a href='/literature/2'>second-title</a>", u'\n\n', '', 0, ], ], 2) result = render_contributions(hits) assert expected == result
def query_imageso(object_list): print("In query_imageso") hit1 = set() image_set = set() print("11. object_list =", object_list) QI = Q('match_all') s1 = Search(index='idx0') for name in object_list: print("name= ", name) QI = QI & Q("match", names=name) s1 = s1.query(QI).using(client) response = s1.execute() for hit in s1.scan(): image_set.add(hit.imgfile) return display_image_set(image_set, None, object_list)
def test_execute(self, mock_save, mock_execute, mock_now): """Test the execute class method.""" search = Search() sq = SearchQuery.execute(search) self.assertEqual(sq.user, None) self.assertEqual(sq.index, '_all') self.assertEqual(sq.query, search.to_dict()) self.assertEqual(sq.hits, []) self.assertEqual(sq.total_hits, mock_execute.return_value.hits.total) self.assertEqual(sq.reference, '') self.assertTrue(sq.duration > 0) self.assertEqual(sq.executed_at, mock_now.return_value) mock_save.assert_called_once_with() # try without saving mock_save.reset_mock() sq = SearchQuery.execute(search, save=False) mock_save.assert_not_called()
def search(cls, **kwargs): options = { 'using': connections.get_connection(), 'index': cls.get_index(), 'doc_type': {cls._doc_type.name: cls.from_es}, } options.update(kwargs) sq = Search(**options) return sq
def search(cls, **kwargs): options = { "using": connections.get_connection(), "index": cls.get_index(), "doc_type": {cls._doc_type.name: cls.from_es}, } options.update(kwargs) sq = Search(**options) return sq
def create_search( must: list = None, should: list = None, filter_: list = None, must_not: list = None, source: dict = None, sort=None, ) -> Search: """ Search index by construct query. Kwargs: must: list of the must satisfied query should: list of the should satisfied query sort: sort statement Return: Search object. """ s = Search(index=INDEX) match_all = Q("match_all") must = must + [match_all] if must else [match_all] should = should if should else [] filter_ = filter_ if filter_ else [] must_not = must_not if must_not else [] s = s.query("bool", must=must, should=should, filter=filter_, must_not=must_not) if sort: s = s.sort(sort) if source: s = s.source(**source) print(f"Query: {json.dumps(s.to_dict())}") return s
def test_execute_count(self, mock_count): mock_count.return_value = 100 search = Search() sq = execute_count(search, search_terms="foo", user=None, reference="bar") sq.refresh_from_db() # just to confirm it saves in / out self.assertIsNotNone(sq.id) self.assertEqual(sq.search_terms, "foo") self.assertEqual(sq.reference, "bar") self.assertEqual(sq.query, search.to_dict()) self.assertEqual(sq.index, "_all") self.assertEqual(sq.hits, []) self.assertEqual(sq.total_hits, 100) self.assertEqual(sq.total_hits_relation, SearchQuery.TotalHitsRelation.ACCURATE) self.assertEqual(sq.query_type, SearchQuery.QueryType.COUNT) self.assertEqual(sq.aggregations, {}) self.assertTrue(sq.duration > 0)
def query_imagesi(classnum_list, upfilename): #Disabled -- #print("In query_imagesi") hit1 = set() image_set = set() #print("11. classnum_list =", classnum_list) QI = Q('match_all') s1 = Search(index='vgnum') classn = 1 for class_num in classnum_list: if classn > 7: #can make this 7-- break classn = classn + 1 print("class_num= ", class_num) QI = QI & Q('bool', must=[Q("match", classnum=class_num)]) s1 = s1.query(QI).using(client) response = s1.execute() for hit in s1.scan(): image_set.add(hit.imgfile) return display_image_set(image_set, upfilename, '')
def async_fetch_questionnaire_details(questionnaire_ids, db_name, full_reindex): logger = logging.getLogger('datawinners.tasks') logger.debug(questionnaire_ids) logger.debug(db_name + ': full reindex:'+full_reindex) if not questionnaire_ids: return None dbm = get_db_manager(db_name) questionnaire_details = [] for form_model_id in questionnaire_ids: form_model = FormModel.get(dbm, form_model_id) if full_reindex or check_mapping_out_of_sync(form_model, dbm): es = Elasticsearch(hosts=[{"host": ELASTIC_SEARCH_HOST, "port": ELASTIC_SEARCH_PORT}]) search = Search(using=es, index=dbm.database_name, doc_type=form_model_id) no_of_submissions = search.count() questionnaire_info = dict( db_name = db_name, questionnaire_id=form_model_id, name=form_model.name, no_of_submissions = no_of_submissions) questionnaire_details.append(questionnaire_info) return questionnaire_details
def get_imgfile(img_id): s = Search(index='idx1').query('match', image_id=img_id) s = s.using(client) s.execute() for hit in s.scan(): imgfile = hit.imagefile return imgfile
def get_imgfile(img_id): s = Search(index='idxi20').query('match', image_id=img_id) s = s.using(client) s.execute() for hit in s.scan(): imgfile = hit.imagefile #imagefile = hit.url #print("22 imgfile = ", imgfile) return imgfile
def test_count_prefetch(data_client, mocker): mocker.spy(data_client, "count") search = Search(using=data_client).index("git") search.execute() assert search.count() == 53 assert data_client.count.call_count == 0 search._response.hits.total.relation = "gte" assert search.count() == 53 assert data_client.count.call_count == 1
def test_render_conferences(request_context): hits = Response( Search(), { 'hits': { 'hits': [ { '_type': 'conferences', '_source': { 'addresses': [ { 'original_address': 'original_address' }, ], 'control_number': 1, 'titles': [ { 'title': 'title' }, ], }, }, { '_type': 'conferences', '_source': { 'control_number': 2, }, }, ], 'total': 2, }, }).hits expected = ([ [ '<a href="/conferences/1">title</a>', 'original_address', '', u' ', ], ], 1) result = render_conferences(2, hits) assert expected == result
def test_render_conferences_handles_unicode(request_context): hits = Response( Search(), { 'hits': { 'hits': [ { '_type': 'conference', '_source': { 'addresses': [ { 'original_address': 'Paris, France' }, ], 'control_number': 1351301, 'titles': [ { 'title': u'Théorie de Cordes en France' }, ], }, }, ], 'total': 1, }, }).hits expected = ([ [ u'<a href="/conferences/1351301">Théorie de Cordes en France</a>', 'Paris, France', '', u' ', ], ], 1) result = render_conferences(1, hits) assert expected == result
def test_count_type(data_client): s = Search(using=data_client).index('git').doc_type('repos') assert 1 == s.count()
class Messenger: """ Performs transformations on data eg. f(x) -> y Decoupled from the other factor network code, and can be swapped with other implementations """ def __init__(self, config='cdr', size=2000): """ :param url: str Fully qualified url to an elasticsearch instance :param size: int| Size limit to set on elasticsearch query """ self.conn = connections.get_connection(config) self.elastic = Search('cdr', extra={'size': size}) def match(self, match_type, **kwargs): return self.elastic.query(match_type, **kwargs).execute() @memoize def available(self, ad_id): """ Get's the available factors for a particular ad :param ad_id: str Unique ad identifier :return: factors :rtype : list """ accumulator = lambda x,y: x|y output = self.match('match_phrase', _id=ad_id) keys = [ set(i['_source'].keys()) for i in output.hits.hits ] return list(reduce(accumulator, keys, set())) def lookup(self, ad_id, field): """ Get data from ad_id :param ad_id: str String to be queried """ if not isinstance(ad_id, list): ad_id = [ad_id] results = self.elastic.query(Ids(values=ad_id)).execute() return set(flatten([ hits['_source'][field] for hits in results.hits.hits if field in hits['_source'] ])) def reverse_lookup(self, field, field_value): """ Get ad_id from a specific field and search term :param field_value: str String to be queried """ results = self.match( 'match_phrase', **{field:field_value}).hits.hits if not results: results = self.match('match', _all=field_value).hits.hits return [hit['_id'] for hit in results] def suggest(self, ad_id, field): """ The suggest function suggests other ad_ids that share this field with the input ad_id. """ suggestions = {} field_values = self.lookup(ad_id, field) for value in field_values: ads = set(self.reverse_lookup(field, value)) # To prevent cycles if isinstance(ad_id, list): ads -= set(ad_id) else: ads.discard(ad_id) suggestions[value] = list(ads) return suggestions
def test_count_all(data_client): s = Search(using=data_client).index('git') assert 53 == s.count()
def test_count_filter(data_client): s = Search(using=data_client).index('git').filter(~Q('exists', field='parent_shas')) # initial commit + repo document assert 2 == s.count()