Ejemplo n.º 1
0
def test_search_can_be_limited_to_fields(data_client):
    s = Search(using=data_client).index('git').doc_type('repos').fields(
        'organization')
    response = s.execute()

    assert response.hits.total == 1
    assert response.hits[0] == {'organization': ['elasticsearch']}
Ejemplo n.º 2
0
def get_list_of_indexes_to_reindex(full_reindex=False):
    db_names = all_db_names()
    try:
        list_of_indexes_out_of_sync = []
        total_submissions = 0
        for database_name in db_names:
            dbm = get_db_manager(database_name)
            questionnaires = dbm.load_all_rows_in_view('questionnaire')
            if not questionnaires:
                continue
            for row in questionnaires:
                if row['value']['is_registration_model']:
                    continue

                form_model_doc = FormModelDocument.wrap(row["value"])
                if full_reindex or is_mapping_out_of_sync(form_model_doc, dbm):
                    es = Elasticsearch(hosts=[{
                        "host": ELASTIC_SEARCH_HOST,
                        "port": ELASTIC_SEARCH_PORT
                    }])
                    search = Search(using=es,
                                    index=dbm.database_name,
                                    doc_type=form_model_doc.id)
                    no_of_submissions = search.count()
                    questionnaire_info = dict(
                        db_name=database_name,
                        questionnaire_id=form_model_doc.id,
                        name=form_model_doc.name,
                        no_of_submissions=no_of_submissions)
                    total_submissions += no_of_submissions
                    list_of_indexes_out_of_sync.append(questionnaire_info)
        return list_of_indexes_out_of_sync, total_submissions
    except Exception as e:
        pass
Ejemplo n.º 3
0
 def get(self, request):
     database_name = get_database_name(request.user)
     search_text = lower(request.GET["term"] or "")
     es = Elasticsearch(hosts=[{
         "host": ELASTIC_SEARCH_HOST,
         "port": ELASTIC_SEARCH_PORT
     }])
     search = Search(using=es, index=database_name, doc_type="reporter")
     search = search.extra(**{"size": "10"})
     resp = []
     if search_text:
         query_text_escaped = ElasticUtilsHelper().replace_special_chars(
             search_text)
         query_fields = [
             "name", "name_value", "name_exact", "short_code",
             "short_code_exact", "short_code_value"
         ]
         search = search.query("query_string",
                               query=query_text_escaped,
                               fields=query_fields)
         search_results = search.execute()
         resp = [{
             "id": result.short_code,
             "label": self.get_label(result)
         } for result in search_results.hits]
     return HttpResponse(json.dumps(resp))
Ejemplo n.º 4
0
    def get(self, request):
        """
            API to fetch store

            @params: store_id: integer
            @returns: Retrive store information
        """

        result = {}
        store_id = int(request.query_params.get('store_id'))
        serializer = FetchStoreSerializer(data=request.query_params)
        if serializer.is_valid():
            search = Search(using=self.es, index="store_data_index")
            search = search.filter("match", store_id=store_id)
            data = search.execute().to_dict()
            if data['hits']['hits']:
                result['status'] = True
                result['data'] = data['hits']['hits'][0]['_source']
            else:
                result['status'] = False
                result['message'] = 'No data found for the give store id'
            status_code = status.HTTP_200_OK
        else:
            status_code = status.HTTP_400_BAD_REQUEST
            result = {'status': False, 'message': serializer.errors}
        return Response(result, status=status_code)
Ejemplo n.º 5
0
def query_imagesi(classnum_list, classval_list, upfilename):

    #print("In query_imagesi")
    hit1 = set()
    image_set = set()
    #print("11. classnum_list =", classnum_list)
    #print("12. classval_list =", classval_list)
    if (len(classnum_list) > 7):
        lenclassnum = 7

    QI = Q('match_all')
    s1 = Search(index='adknum')
    #s1 = Search(index='vgnum')
    classn = 1
    for class_num in classnum_list:
        if classn > 7:  #can make this 5--
            break
        classn = classn + 1
        QI = QI & Q('bool', must=[Q("match", classnum=class_num)])

    s1 = s1.query(QI).using(client)
    response = s1.execute()
    hit_num = 0
    simDict = {}
    similarityClass = np.zeros(15)
    for hit in s1.scan():
        print("123. hit.classnum: ", hit.classnum)
        print("124. hit.classval: ", hit.classval)
        lenimgclassnum = len(hit.classnum)
        simDict[hit.imgfile] = 1.0
        #similarityClass[lenimgclassnum]
        # Compute similarity and choose top 4 rather than random 4
        ii = 0
        similarityImg = 0.0
        for classi in classnum_list:
            jj = 0
            for classj in hit.classnum:
                if classi == int(classj):
                    similarityClass[ii] = classval_list[ii] / (abs(
                        (classval_list[ii] - int(hit.classval[jj]))) + 10)
                    #print("144. similarityClass[ii] = ", similarityClass[ii])
                    similarityImg = similarityImg + similarityClass[ii]
                    break
                jj = jj + 1
            simDict[hit.imgfile] = similarityImg
            #print("130. simDict[hit.imgfile] = ", simDict[hit.imgfile], similarityImg)
            ii = ii + 1

    #for key in sorted(simDict.keys(), reverse=True) :
    kk = 0
    for img in sorted(simDict, key=simDict.get, reverse=True):
        print("140. ", img, simDict[img])
        image_set.add(img)
        #image_set.add(hit.imgfile)
        #pick top 4 images (hit_nums)
        kk = kk + 1
        if kk > 3:
            break

    return display_image_set(image_set, upfilename, '')
Ejemplo n.º 6
0
def query_images(object_list):
    #print("In query_images")
    hit1 = set()
    image_set = set()
    print("11. object_list =", object_list)

    QI = Q('match_all')
    #s1 = Search(index='bvgobjs_index')
    s1 = Search(index='idxo20')
    for objectk in object_list:
        print("objectk= ", objectk)
        QI = QI & Q("match", names=objectk)

    s1 = s1.query(QI).using(client)
    response = s1.execute()
    for hit in s1.scan():
        print("33 ", hit.imgfile)
        image_set.add(hit.imgfile)

    print("image_set = {0}".format(image_set))
    im = 0
    #app.layout = serve_layout
    images_div = []
    for image in image_set:
        if im > 3:
            break
        file, ext = os.path.splitext(image)
        image = file + '.png'
        print("66 image =", image)
        images_div.append(display_image(image))
        im = im + 1
    print("Please hit refresh...")
    # Here call callback -
    #serve_layout =
    app.layout = serve_layout(images_div)
Ejemplo n.º 7
0
	def post(self):
		ts = self.args['_']
		if abs(int(time() * 1000) - int(ts)) > 1800000:
			return {'success':0, 'message': '时间戳无效'}, 200
		token = self.args['token']
		appkey = self.args['appkey']
		verify_token = flask_redis.get(appkey)
		if verify_token is None:
			return {'success': 0, 'message': 'token 无效'}, 200
		else:
			verify_token = verify_token.decode('utf-8') if type(verify_token) == type(b'') else verify_token
			if verify_token != token:
				return {'success': 0, 'message': 'token 无效'}, 200
		sign = self.args['sign']
		if hash_sha256("{0},{1},{2}".format(ts, token, appkey)) != sign:
			return {'success': 0, 'message': 'sign 无效'}, 200
		keyword = self.args['keyword']
		query = Website.query.join(Token, Website.id==Token.website_id).filter(Token.appkey == appkey).first()
		domain = query.domain
		try:
			s = Search(using=client, index='suggest', doc_type='news')
			s = s.filter('term', website=domain).query('match', title=keyword)
			s = s[0:10]
			response = s.execute()
			return {'success': 1, 'data': response.to_dict()}, 200
		except Exception as e:
			return {'success': 0, 'message': e}, 200
Ejemplo n.º 8
0
def test_search_type_count(data_client):
    s = Search(using=data_client, index='git')
    s.aggs.bucket('per_type', 'terms', field='_type')
    s = s.params(search_type='count')
    result = s.execute()

    assert [] == result.hits
    assert 2 == len(result.aggregations.per_type.buckets)
Ejemplo n.º 9
0
def test_search_type_count(data_client):
    s = Search(using=data_client, index='git')
    s.aggs.bucket('per_type', 'terms', field='_type')
    s = s.params(search_type='count')
    result = s.execute()

    assert [] == result.hits
    assert 2 == len(result.aggregations.per_type.buckets)
Ejemplo n.º 10
0
 def __init__(self, config='cdr', size=2000):
     """
     :param url: str
         Fully qualified url to an elasticsearch instance
     :param size: int|
         Size limit to set on elasticsearch query
     """
     self.conn = connections.get_connection(config)
     self.elastic = Search('cdr', extra={'size': size})
Ejemplo n.º 11
0
 def get(self, request):
     keyWords = request.GET.get('s', '')
     kw = {'using': client, 'index': 'hfut_search', 'doc_type': 'hfut_type'}
     sugg = Search(**kw)
     sugg = sugg.suggest('my_suggest',
                         keyWords,
                         completion={
                             "field": "suggest",
                             "size": common.FETCH_NUMBER_DEF['pageNum']
                         })
     sugg = sugg.execute()
     options = sugg.suggest['my_suggest'][0].options
     reDatas = [match._source["title"] for match in options]
     return HttpResponse(json.dumps(reDatas),
                         content_type="application/json")
Ejemplo n.º 12
0
    def search(cls, **kwargs):
        kwargs.update({
            'using': connections.get_connection(),
            'index': cls.get_index(),
            'doc_type': {
                cls._doc_type.name: cls.from_es
            },
        })
        sq = Search(**kwargs)

        # Add highlighting.
        sq = sq.highlight(*cls.excerpt_fields)
        sq = sq.highlight_options(order='score')

        return sq
Ejemplo n.º 13
0
async def test_msearch(data_client):
    async_client = AsyncElasticsearch(hosts=['localhost'])
    s = Search(using=async_client).index('git')
    ms = MultiSearch(using=async_client).index('git')
    ms = ms.add(s).add(s)
    r1, r2 = await ms.execute()
    assert all([r1.success(), r2.success()])
Ejemplo n.º 14
0
def test_render_contributions_handles_unicode():
    hits = Response(
        Search(), {
            'hits': {
                'hits': [
                    {
                        '_type': 'hep',
                        '_source': {
                            'control_number':
                            1427573,
                            'titles': [
                                {
                                    'title':
                                    u'Storage Ring Based EDM Search — Achievements and Goals'
                                },
                            ],
                        },
                    },
                ],
                'total':
                1,
            },
        }).hits

    expected = ([
        [
            u"<a href='/literature/1427573'>Storage Ring Based EDM Search — Achievements and Goals</a>",
            u'\n\n',
            '',
            0,
        ],
    ], 1)
    result = render_contributions(hits)

    assert expected == result
Ejemplo n.º 15
0
def test_render_people():
    hits = Response(
        Search(), {
            'hits': {
                'hits': [
                    {
                        '_type': 'authors',
                        '_source': {
                            'control_number': 1,
                            'name': {
                                'preferred_name': 'preferred_name',
                            },
                        },
                    },
                ],
                'total':
                1,
            },
        }).hits

    expected = ([
        [
            "<a href='/authors/1'>preferred_name</a>",
        ],
    ], 1)
    result = render_people(hits)

    assert expected == result
Ejemplo n.º 16
0
def test_render_contributions():
    hits = Response(
        Search(), {
            'hits': {
                'hits': [
                    {
                        '_type': 'hep',
                        '_source': {
                            'citation_count':
                            1,
                            'control_number':
                            1,
                            'publication_info': [
                                {
                                    'journal_title': 'first-journal_title'
                                },
                            ],
                            'titles': [
                                {
                                    'title': 'first-title'
                                },
                            ],
                        },
                    },
                    {
                        '_type': 'hep',
                        '_source': {
                            'control_number': 2,
                            'titles': [
                                {
                                    'title': 'second-title'
                                },
                            ],
                        },
                    },
                ],
                'total':
                2,
            },
        }).hits

    expected = ([
        [
            "<a href='/literature/1'>first-title</a>",
            u'\n\n',
            'first-journal_title',
            1,
        ],
        [
            "<a href='/literature/2'>second-title</a>",
            u'\n\n',
            '',
            0,
        ],
    ], 2)
    result = render_contributions(hits)

    assert expected == result
Ejemplo n.º 17
0
def query_imageso(object_list):
    print("In query_imageso")
    hit1 = set()
    image_set = set()
    print("11. object_list =", object_list)

    QI = Q('match_all')
    s1 = Search(index='idx0')
    for name in object_list:
        print("name= ", name)
        QI = QI & Q("match", names=name)

    s1 = s1.query(QI).using(client)
    response = s1.execute()
    for hit in s1.scan():
        image_set.add(hit.imgfile)

    return display_image_set(image_set, None, object_list)
Ejemplo n.º 18
0
    def test_execute(self, mock_save, mock_execute, mock_now):
        """Test the execute class method."""
        search = Search()
        sq = SearchQuery.execute(search)
        self.assertEqual(sq.user, None)
        self.assertEqual(sq.index, '_all')
        self.assertEqual(sq.query, search.to_dict())
        self.assertEqual(sq.hits, [])
        self.assertEqual(sq.total_hits, mock_execute.return_value.hits.total)
        self.assertEqual(sq.reference, '')
        self.assertTrue(sq.duration > 0)
        self.assertEqual(sq.executed_at, mock_now.return_value)
        mock_save.assert_called_once_with()

        # try without saving
        mock_save.reset_mock()
        sq = SearchQuery.execute(search, save=False)
        mock_save.assert_not_called()
Ejemplo n.º 19
0
    def search(cls, **kwargs):
        options = {
            'using': connections.get_connection(),
            'index': cls.get_index(),
            'doc_type': {cls._doc_type.name: cls.from_es},
        }
        options.update(kwargs)
        sq = Search(**options)

        return sq
Ejemplo n.º 20
0
    def search(cls, **kwargs):
        options = {
            "using": connections.get_connection(),
            "index": cls.get_index(),
            "doc_type": {cls._doc_type.name: cls.from_es},
        }
        options.update(kwargs)
        sq = Search(**options)

        return sq
Ejemplo n.º 21
0
def create_search(
    must: list = None,
    should: list = None,
    filter_: list = None,
    must_not: list = None,
    source: dict = None,
    sort=None,
) -> Search:
    """
    Search index by construct query.

    Kwargs:
        must: list of the must satisfied query
        should: list of the should satisfied query
        sort: sort statement

    Return:
        Search object.
    """
    s = Search(index=INDEX)

    match_all = Q("match_all")

    must = must + [match_all] if must else [match_all]
    should = should if should else []
    filter_ = filter_ if filter_ else []
    must_not = must_not if must_not else []

    s = s.query("bool",
                must=must,
                should=should,
                filter=filter_,
                must_not=must_not)

    if sort:
        s = s.sort(sort)

    if source:
        s = s.source(**source)

    print(f"Query: {json.dumps(s.to_dict())}")

    return s
 def test_execute_count(self, mock_count):
     mock_count.return_value = 100
     search = Search()
     sq = execute_count(search,
                        search_terms="foo",
                        user=None,
                        reference="bar")
     sq.refresh_from_db()  # just to confirm it saves in / out
     self.assertIsNotNone(sq.id)
     self.assertEqual(sq.search_terms, "foo")
     self.assertEqual(sq.reference, "bar")
     self.assertEqual(sq.query, search.to_dict())
     self.assertEqual(sq.index, "_all")
     self.assertEqual(sq.hits, [])
     self.assertEqual(sq.total_hits, 100)
     self.assertEqual(sq.total_hits_relation,
                      SearchQuery.TotalHitsRelation.ACCURATE)
     self.assertEqual(sq.query_type, SearchQuery.QueryType.COUNT)
     self.assertEqual(sq.aggregations, {})
     self.assertTrue(sq.duration > 0)
Ejemplo n.º 23
0
def query_imagesi(classnum_list, upfilename):  #Disabled --
    #print("In query_imagesi")
    hit1 = set()
    image_set = set()
    #print("11. classnum_list =", classnum_list)

    QI = Q('match_all')
    s1 = Search(index='vgnum')
    classn = 1
    for class_num in classnum_list:
        if classn > 7:  #can make this 7--
            break
        classn = classn + 1
        print("class_num= ", class_num)
        QI = QI & Q('bool', must=[Q("match", classnum=class_num)])

    s1 = s1.query(QI).using(client)
    response = s1.execute()
    for hit in s1.scan():
        image_set.add(hit.imgfile)
    return display_image_set(image_set, upfilename, '')
Ejemplo n.º 24
0
def async_fetch_questionnaire_details(questionnaire_ids, db_name, full_reindex):
    logger = logging.getLogger('datawinners.tasks')
    logger.debug(questionnaire_ids)
    logger.debug(db_name + ': full reindex:'+full_reindex)
    if not questionnaire_ids:
        return None
    dbm = get_db_manager(db_name)
    questionnaire_details = []
    for form_model_id in questionnaire_ids:
        form_model = FormModel.get(dbm, form_model_id)
        if full_reindex or check_mapping_out_of_sync(form_model, dbm):
            es = Elasticsearch(hosts=[{"host": ELASTIC_SEARCH_HOST, "port": ELASTIC_SEARCH_PORT}])
            search = Search(using=es, index=dbm.database_name, doc_type=form_model_id)
            no_of_submissions = search.count()
            questionnaire_info = dict(
                                      db_name = db_name,
                                      questionnaire_id=form_model_id,
                                      name=form_model.name,
                                      no_of_submissions = no_of_submissions)
            questionnaire_details.append(questionnaire_info)
    return questionnaire_details
Ejemplo n.º 25
0
def get_imgfile(img_id):
    s = Search(index='idx1').query('match', image_id=img_id)
    s = s.using(client)
    s.execute()
    for hit in s.scan():
        imgfile = hit.imagefile
        return imgfile
Ejemplo n.º 26
0
def get_imgfile(img_id):
    s = Search(index='idxi20').query('match', image_id=img_id)
    s = s.using(client)
    s.execute()
    for hit in s.scan():
        imgfile = hit.imagefile
        #imagefile = hit.url
        #print("22 imgfile = ", imgfile)
        return imgfile
Ejemplo n.º 27
0
def test_count_prefetch(data_client, mocker):
    mocker.spy(data_client, "count")

    search = Search(using=data_client).index("git")
    search.execute()
    assert search.count() == 53
    assert data_client.count.call_count == 0

    search._response.hits.total.relation = "gte"
    assert search.count() == 53
    assert data_client.count.call_count == 1
Ejemplo n.º 28
0
def test_render_conferences(request_context):
    hits = Response(
        Search(), {
            'hits': {
                'hits': [
                    {
                        '_type': 'conferences',
                        '_source': {
                            'addresses': [
                                {
                                    'original_address': 'original_address'
                                },
                            ],
                            'control_number':
                            1,
                            'titles': [
                                {
                                    'title': 'title'
                                },
                            ],
                        },
                    },
                    {
                        '_type': 'conferences',
                        '_source': {
                            'control_number': 2,
                        },
                    },
                ],
                'total':
                2,
            },
        }).hits

    expected = ([
        [
            '<a href="/conferences/1">title</a>',
            'original_address',
            '',
            u'  ',
        ],
    ], 1)
    result = render_conferences(2, hits)

    assert expected == result
Ejemplo n.º 29
0
def test_render_conferences_handles_unicode(request_context):
    hits = Response(
        Search(), {
            'hits': {
                'hits': [
                    {
                        '_type': 'conference',
                        '_source': {
                            'addresses': [
                                {
                                    'original_address': 'Paris, France'
                                },
                            ],
                            'control_number':
                            1351301,
                            'titles': [
                                {
                                    'title': u'Théorie de Cordes en France'
                                },
                            ],
                        },
                    },
                ],
                'total':
                1,
            },
        }).hits

    expected = ([
        [
            u'<a href="/conferences/1351301">Théorie de Cordes en France</a>',
            'Paris, France',
            '',
            u'  ',
        ],
    ], 1)
    result = render_conferences(1, hits)

    assert expected == result
Ejemplo n.º 30
0
def test_count_type(data_client):
    s = Search(using=data_client).index('git').doc_type('repos')
    assert 1 == s.count()
Ejemplo n.º 31
0
class Messenger:
    """
    Performs transformations on data

        eg. f(x) -> y

    Decoupled from the other factor network code,
    and can be swapped with other implementations
    """

    def __init__(self, config='cdr', size=2000):
        """
        :param url: str
            Fully qualified url to an elasticsearch instance
        :param size: int|
            Size limit to set on elasticsearch query
        """
        self.conn = connections.get_connection(config)
        self.elastic = Search('cdr', extra={'size': size})

    def match(self, match_type, **kwargs):
        return self.elastic.query(match_type, **kwargs).execute()

    @memoize
    def available(self, ad_id):
        """
        Get's the available factors for a particular ad

        :param ad_id: str
            Unique ad identifier

        :return: factors
        :rtype : list
        """
        accumulator = lambda x,y: x|y
        output      = self.match('match_phrase', _id=ad_id)
        keys        = [
            set(i['_source'].keys())
                for i in output.hits.hits
        ]
        return list(reduce(accumulator, keys, set()))

    def lookup(self, ad_id, field):
        """
        Get data from ad_id

        :param ad_id: str
            String to be queried
        """
        if not isinstance(ad_id, list):
            ad_id = [ad_id]

        results = self.elastic.query(Ids(values=ad_id)).execute()

        return set(flatten([
            hits['_source'][field] for hits in results.hits.hits
                if field in hits['_source']
        ]))


    def reverse_lookup(self, field, field_value):
        """
        Get ad_id from a specific field and search term

        :param field_value: str
            String to be queried
        """
        results = self.match(
            'match_phrase', **{field:field_value}).hits.hits

        if not results:
            results = self.match('match', _all=field_value).hits.hits

        return [hit['_id'] for hit in results]

    def suggest(self, ad_id, field):
        """
        The suggest function suggests other ad_ids that share this
        field with the input ad_id.
        """
        suggestions = {}
        field_values = self.lookup(ad_id, field)

        for value in field_values:
            ads = set(self.reverse_lookup(field, value))

            # To prevent cycles
            if isinstance(ad_id, list):
                ads -= set(ad_id)
            else:
                ads.discard(ad_id)
            suggestions[value] = list(ads)

        return suggestions
Ejemplo n.º 32
0
def test_count_all(data_client):
    s = Search(using=data_client).index('git')
    assert 53 == s.count()
Ejemplo n.º 33
0
def test_count_filter(data_client):
    s = Search(using=data_client).index('git').filter(~Q('exists', field='parent_shas'))
    # initial commit + repo document
    assert 2 == s.count()