コード例 #1
0
ファイル: test_search.py プロジェクト: daker/mozillians
    def test_profilepic_search(self):
        """Make sure searching for only users with profile pics works."""
        with open(os.path.join(os.path.dirname(__file__), 'profile-photo.jpg')) as f:
            r = self.mozillian_client.post(reverse('profile.edit'),
                dict(first_name='Aman', last_name='Withapic', photo=f))

        if not settings.ES_DISABLED:
            get_es().refresh(settings.ES_INDEXES['default'], timesleep=0)

        amanhasapic = 'Aman Withapic'
        amanda = 'Amanda Younger'
        url = reverse('search')
        r = self.mozillian_client.get(url, dict(q='Am'))
        rpp = self.mozillian_client.get(url, dict(q='Am', picture_only=1))

        eq_(r.status_code, 200)
        peeps = r.context['people']
        peeps_pp = rpp.context['people']
        saw_amanda = False

        # Make sure that every body has a profile picture
        for person in peeps:
            if person.display_name == amanda:
                if bool(person.photo):
                    self.fail('Amanda doesnt have a profile pic')
                saw_amanda = True

        # Make sure amanda shows up in peeps
        assert amanda in [p.display_name for p in peeps]
        # Make sure she doesn't show up in peeps_pp
        assert amanda not in [p.display_name for p in peeps_pp]
        self.assertEqual(peeps_pp[0].display_name, amanhasapic)
        self.assertTrue(saw_amanda, 'We dont see profile picture')
コード例 #2
0
ファイル: estestcase.py プロジェクト: chiehwen/elasticutils
    def setUpClass(cls):
        super(ElasticSearchTestCase, cls).setUpClass()
        if not getattr(settings, 'ES_URLS', None):
            cls._skip_tests = True
            return

        try:
            get_es().health()
        except (Timeout, ConnectionError):
            cls._skip_tests = True
            return

        # Save settings and override them
        cls._old_es_disabled = settings.ES_DISABLED
        settings.ES_DISABLED = False

        cls._old_es_indexes = settings.ES_INDEXES
        settings.ES_INDEXES = testify(settings.ES_INDEXES)

        cls.es = get_es()
        for index in settings.ES_INDEXES.values():
            try:
                cls.es.delete_index(index)
            except ElasticHttpNotFoundError:
                pass
コード例 #3
0
ファイル: tests.py プロジェクト: ccarruitero/mozillians
    def setUpClass(cls):
        """Runs the :class:`TestCase` setup to add some data.

        Also flushes and refreshes the data so it's searchable via computer.
        """
        elasticutils.tests.ESTestCase.setUpClass()
        TestCase.setUpClass()
        get_es().flush(refresh=True)
コード例 #4
0
ファイル: __init__.py プロジェクト: readevalprint/kitsune
    def refresh(self):
        # Any time we're doing a refresh, we're making sure that the
        # index is ready to be queried.  Given that, it's almost
        # always the case that we want to run all the generated tasks,
        # then refresh.
        from search.models import generate_tasks
        generate_tasks()

        get_es().refresh(settings.ES_INDEXES['default'], timesleep=0)
コード例 #5
0
ファイル: __init__.py プロジェクト: wraithan/elasticutils
    def refresh(self, timesleep=0):
        """Refresh index after indexing.

        This refreshes the index specified by `self.index_name`.

        :arg timesleep: int; number of seconds to sleep after telling
            ES to refresh

        """
        get_es().refresh(self.index_name, timesleep=timesleep)
コード例 #6
0
ファイル: test_es.py プロジェクト: klrmn/kitsune
    def refresh(self, timesleep=0):
        index = es_utils.WRITE_INDEX

        # Any time we're doing a refresh, we're making sure that the
        # index is ready to be queried.  Given that, it's almost
        # always the case that we want to run all the generated tasks,
        # then refresh.
        generate_tasks()

        get_es().refresh(index, timesleep=timesleep)
コード例 #7
0
    def setup_class(cls):
        """Class setup for tests.

        Checks to see if ES is running and if not, sets ``skip_test``
        to True on the class.
        """
        # Note: TestCase has no setup_class
        try:
            get_es().collect_info()
        except pyes.urllib3.MaxRetryError:
            cls.skip_tests = True
コード例 #8
0
ファイル: __init__.py プロジェクト: mythmon/elasticutils
    def setup_class(cls):
        """Class setup for tests.

        Checks to see if ES is running and if not, sets ``skip_test``
        to True on the class.
        """
        # Note: TestCase has no setup_class
        try:
            get_es().health()
        except pyelasticsearch.exceptions.ConnectionError:
            cls.skip_tests = True
コード例 #9
0
ファイル: models.py プロジェクト: aparo/elasticutils
    def index(cls, document, id=None, bulk=False, force_insert=False):
        """Associates a document with a correlated id in ES.

        Wrapper around pyes.ES.index.

        Example::

            MyModel.index(instance.fields, id=instance.id)
        """
        elasticutils.get_es().index(
            document, index=cls._get_index(), doc_type=cls._meta.db_table,
            id=id, bulk=bulk, force_insert=force_insert)
コード例 #10
0
ファイル: test_es.py プロジェクト: HonzaKral/elasticutils
    def test_get_es_force_new(self):
        """Test that force_new works correctly."""
        es = get_es()

        es2 = get_es(force_new=True)

        # force_new prevents the new ElasticSearch instance from getting
        # cached, so we should only have one item in the cache.
        eq_(len(_cached_elasticsearch), 1)

        # However, the two ElasticSearch instances should be different.
        assert id(es) != id(es2)
コード例 #11
0
ファイル: models.py プロジェクト: erikrose/kitsune
    def unindex(cls, id):
        """Removes a document from the index"""
        if not settings.ES_LIVE_INDEXING:
            return

        index = cls.get_es_index()
        doc_type = cls._meta.db_table
        try:
            elasticutils.get_es().delete(index, doc_type, id)
        except pyes.exceptions.NotFoundException:
            # Ignore the case where we try to delete something that's
            # not there.
            pass
コード例 #12
0
ファイル: tasks.py プロジェクト: clouserw/FlightDeck
def index_all(pks, **kw):
    ids_str = ','.join(map(str, pks))
    log.debug('ES starting bulk action for packages: [%s]' % ids_str)
    
    for package in Package.objects.filter(pk__in=pks):
        package.refresh_index(bulk=True)
    
    try:
        get_es().flush_bulk(forced=True)
    except KeyboardInterrupt:
        raise
    except Exception, e:
        log.error('ES failed bulk action (%s), package ids: [%s]'
                  % (e, ids_str))
コード例 #13
0
ファイル: test_es.py プロジェクト: HonzaKral/elasticutils
    def test_get_es_settings_cache(self):
        """Tests **settings and cache."""
        es = get_es(max_retries=5, revival_delay=10)
        eq_(len(_cached_elasticsearch), 1)

        # Switching the order doesn't affect caching.
        es2 = get_es(revival_delay=10, max_retries=5)
        eq_(len(_cached_elasticsearch), 1)
        assert id(es) == id(es2)

        # Different values brings up a new item.
        es3 = get_es(max_retries=4, revival_delay=10)
        eq_(len(_cached_elasticsearch), 2)
        assert id(es) != id(es3)
コード例 #14
0
ファイル: views.py プロジェクト: beenishkhan/zamboni
def elastic(request):
    INDEX = site_settings.ES_INDEXES['default']
    es = elasticutils.get_es()
    mappings = {'addons': addons.cron.reindex_addons,
                'apps': addons.cron.reindex_apps,
                'collections': bandwagon.cron.reindex_collections,
                'compat': compatibility_report,
                'users': users.cron.reindex_users,
               }
    if request.method == 'POST':
        if request.POST.get('recreate'):
            es.delete_index_if_exists(INDEX)
            # We must set up the mappings before we create the index again.
            addons.search.setup_mapping()
            stats.search.setup_indexes()
            es.create_index_if_missing(INDEX)
            messages.info(request, 'Deleting %s index.' % INDEX)
        if request.POST.get('reindex') in mappings:
            name = request.POST['reindex']
            # Reindex.
            if mappings.get(name):
                mappings[name]()
            messages.info(request, 'Reindexing %s.' % name)
        return redirect('zadmin.elastic')

    indexes = set(site_settings.ES_INDEXES.values())
    mappings = es.get_mapping(None, indexes)
    ctx = {
        'index': INDEX,
        'nodes': es.cluster_nodes(),
        'health': es.cluster_health(),
        'state': es.cluster_state(),
        'mappings': [(index, mappings.get(index, {})) for index in indexes],
    }
    return jingo.render(request, 'zadmin/elastic.html', ctx)
コード例 #15
0
ファイル: indexes.py プロジェクト: andymckay/monolith
 def handle(self, *args, **options):
     conn = get_es()
     if options.get('delete'):
         conn.delete_index('monolith')
     conn.create_index('monolith')
     mapping = {
         'name': {
             'store': 'yes',
             'type': 'string',
         },
         'date': {
             'store': 'yes',
             'type': 'date',
             'format': 'yyyy-MM-dd',
         },
         'key': {
             'store': 'yes',
             'type': 'string',
         },
         'value': {
             'store': 'yes',
             'type': 'integer',
         }
     }
     conn.put_mapping('metrics',
                      {'properties': mapping},
                      ['monolith'])
コード例 #16
0
ファイル: search.py プロジェクト: ominds/zamboni
def setup_mkt_indexes():
    """
    Define explicit ES mappings for models. If a field is not explicitly
    defined and a field is inserted, ES will dynamically guess the type and
    insert it, in a schemaless manner.
    """
    es = elasticutils.get_es()
    for model in [Contribution, InappPayment]:
        index = model._get_index()
        try:
            es.create_index_if_missing(index)
        except pyes.ElasticSearchException:
            pass

        mapping = {
            'properties': {
                'id': {'type': 'long'},
                'date': {'format': 'dateOptionalTime',
                         'type': 'date'},
                'count': {'type': 'long'},
                'revenue': {'type': 'double'},

                # Try to tell ES not to 'analyze' the field to querying with
                # hyphens and lowercase letters.
                'currency': {'type': 'string',
                             'index': 'not_analyzed'},
                'source': {'type': 'string',
                           'index': 'not_analyzed'},
                'inapp': {'type': 'string',
                          'index': 'not_analyzed'}
            }
        }

        es.put_mapping(model._meta.db_table, mapping,
                       model._get_index())
コード例 #17
0
ファイル: tasks.py プロジェクト: bebef1987/zamboni
def index_collections(ids, **kw):
    es = elasticutils.get_es()
    log.debug('Indexing collections %s-%s [%s].' % (ids[0], ids[-1], len(ids)))
    qs = Collection.uncached.filter(id__in=ids).transform(attach_translations)
    for c in qs:
        Collection.index(search.extract(c), bulk=True, id=c.id)
    es.flush_bulk(forced=True)
コード例 #18
0
ファイル: tests.py プロジェクト: aparo/elasticutils
 def test_get_es_defaults(self):
     es = get_es()
     eq_(es.timeout, settings.ES_TIMEOUT)
     # dump_curl defaults to False, but if dump_curl is Falsey,
     # then pyes.es.ES sets its dump_curl attribute to None.
     eq_(es.dump_curl, None)
     eq_(es.default_indexes, [settings.ES_INDEXES["default"]])
コード例 #19
0
ファイル: es_search.py プロジェクト: jasonthomas/kitsune
def setup_mapping(index):
    from forums.models import Thread

    mapping = {
        'properties': {
            'id': {TYPE: INTEGER},
            'thread_id': {TYPE: INTEGER},
            'forum_id': {TYPE: INTEGER},
            'title': {TYPE: STRING, INDEX: ANALYZED, ANALYZER: SNOWBALL},
            'is_sticky': {TYPE: BOOLEAN},
            'is_locked': {TYPE: BOOLEAN},
            'author_id': {TYPE: INTEGER},
            'author_ord': {TYPE: STRING},
            'content': {TYPE: STRING, INDEX: ANALYZED, ANALYZER: SNOWBALL,
                        STORE: YES, TERM_VECTOR: WITH_POS_OFFSETS},
            'created': {TYPE: DATE},
            'updated': {TYPE: DATE},
            'replies': {TYPE: INTEGER}
            }
        }

    es = elasticutils.get_es()

    try:
        es.put_mapping(Thread._meta.db_table, mapping, index)
    except pyes.exceptions.ElasticSearchException, e:
        log.error(e)
コード例 #20
0
ファイル: es_utils.py プロジェクト: erikrose/kitsune
def es_reindex_with_progress(doctypes=None, percent=100):
    """Rebuild Elastic indexes as you iterate over yielded progress ratios.

    :arg doctypes: Defaults to None which will index all doctypes.
        Otherwise indexes the doctypes specified. See
        :py:func:`.get_doctype_stats()` for what doctypes look like.
    :arg percent: Defaults to 100.  Allows you to specify how much of
        each doctype you want to index.  This is useful for
        development where doing a full reindex takes an hour.

    """
    from search.models import get_search_models

    es = elasticutils.get_es()

    search_models = get_search_models()
    if doctypes:
        search_models = [cls for cls in search_models
                         if cls._meta.db_table in doctypes]

    if len(search_models) == len(get_search_models()):
        index = settings.ES_INDEXES.get('default')
        if index is not None:
            # If we're indexing everything and there's a default index
            # specified in settings, then we delete and recreate it.
            es.delete_index_if_exists(index)
            es.create_index(index)

    total = sum([cls.objects.count() for cls in search_models])

    to_index = [cls.index_all(percent) for cls in search_models]

    return (float(done) / total for done, _ in
            izip(count(1), chain(*to_index)))
コード例 #21
0
ファイル: models.py プロジェクト: namitsingal/zamboni
 def unindex(cls, id):
     es = elasticutils.get_es()
     try:
         es.delete(cls._get_index(), cls._meta.db_table, id)
     except pyes.exceptions.NotFoundException:
         # Item wasn't found, whatevs.
         pass
コード例 #22
0
ファイル: tasks.py プロジェクト: gedex/zamboni
def index_finance_total_by_currency(addons, **kw):
    """
    Bug 757581
    Total finance stats, currency breakdown.
    """
    es = elasticutils.get_es()
    log.info('Indexing total financial stats by currency for %s apps.' %
              len(addons))

    for addon in addons:
        # Get all contributions for given add-on.
        qs = Contribution.objects.filter(addon=addon, uuid=None)
        if not qs.exists():
            continue

        # Get list of distinct currencies.
        currencies = set(qs.values_list('currency', flat=True))

        for currency in currencies:
            try:
                key = ord_word('cur' + str(addon) + currency.lower())
                data = search.get_finance_total_by_currency(
                    qs, addon, currency)
                if not already_indexed(Contribution, data):
                    Contribution.index(data, bulk=True, id=key)
                es.flush_bulk(forced=True)
            except Exception, exc:
                index_finance_total_by_currency.retry(args=[addons], exc=exc)
                raise
コード例 #23
0
ファイル: emitters.py プロジェクト: arnm/accipiokey
    def on_current_word_signal(self, word_signal):

        if not word_signal.strip():
            self._possible_completion = ''
            self.possible_completion_signal.emit(self._possible_completion)
            return

        suggestion_name = 'completion_suggestion'
        compl_resp = get_es().suggest(index=WordMappingType.get_index(),
            body={
                suggestion_name: {
                    'text': word_signal,
                    'completion': {
                        'field': 'text'
                    }
                }
            })
        suggestions = compl_resp[suggestion_name][0]['options']

        if not suggestions:
            self._possible_completion = ''
            self.possible_completion_signal.emit(self._possible_completion)
            return

        for suggestion in suggestions:
            if suggestion['text'] != word_signal:
                top_suggestion = suggestion['text']
                self._possible_completion = top_suggestion
                break

        self.possible_completion_signal.emit(self._possible_completion)
コード例 #24
0
ファイル: tasks.py プロジェクト: gedex/zamboni
def index_installed_daily(ids, **kw):
    """
    Takes a list of Installed ids and uses its addon and date fields to index
    stats for that day.
    ids -- ids of mkt.webapps.Installed objects
    """
    from mkt.webapps.models import Installed
    es = elasticutils.get_es()
    # Get Installed's
    qs = (Installed.objects.filter(id__in=set(ids)).
        order_by('-created').values('addon', 'created'))
    log.info('[%s] Indexing %s installed counts for daily stats.' %
             (qs[0]['created'], len(qs)))

    addons_dates = defaultdict(lambda: defaultdict(dict))
    for installed in qs:
        addon = installed['addon']
        date = installed['created'].strftime('%Y%m%d')

        try:
            if not date in addons_dates[addon]:
                key = ord_word('ins' + str(addon) + str(date))
                data = search.get_installed_daily(installed)
                if not already_indexed(Installed, data):
                    Installed.index(data, bulk=True, id=key)
                addons_dates[addon][date] = 0
            es.flush_bulk(forced=True)
        except Exception, exc:
            index_installed_daily.retry(args=[ids], exc=exc)
            raise
コード例 #25
0
ファイル: es_utils.py プロジェクト: readevalprint/kitsune
def es_reindex_with_progress(percent=100):
    """Rebuild Elastic indexes as you iterate over yielded progress ratios.

    :arg percent: Defaults to 100.  Allows you to specify how much of
        each doctype you want to index.  This is useful for
        development where doing a full reindex takes an hour.

    """
    from search.models import get_search_models

    search_models = get_search_models()

    es = elasticutils.get_es()
    index = settings.ES_INDEXES['default']
    es.delete_index_if_exists(index)
    # There should be no mapping-conflict race here since the index doesn't
    # exist. Live indexing should just fail.

    # Simultaneously create the index and the mappings, so live indexing
    # doesn't get a chance to index anything between the two and infer a bogus
    # mapping (which ES then freaks out over when we try to lay in an
    # incompatible explicit mapping).
    mappings = dict((cls._meta.db_table, {'properties': cls.get_mapping()})
                    for cls in search_models)
    es.create_index(index, settings={'mappings': mappings})

    total = sum([cls.objects.count() for cls in search_models])
    to_index = [cls.index_all(percent) for cls in search_models]
    return (float(done) / total for done, _ in
            izip(count(1), chain(*to_index)))
コード例 #26
0
ファイル: tasks.py プロジェクト: gkoberger/zamboni
def index_contribution_counts(ids, **kw):
    """
    Contribution stats by addon-date unique pair Uses a nested
    dictionary to not index duplicate contribution with same addon/date
    pairs. For each addon-date, it stores the addon in the dict as a top
    level key with a dict as its value. And it stores the date in the
    addon's dict as a second level key. To check if an addon-date pair has
    been already index, it looks up the dict[addon][date] to see if the
    key exists.
    """
    es = elasticutils.get_es()
    qs = (Contribution.objects.filter(id__in=ids)
          .order_by('created').values('addon', 'created'))

    try:
        addons_dates = defaultdict(lambda: defaultdict(dict))
        for contribution in qs:
            addon = contribution['addon']
            date = contribution['created'].strftime('%Y%m%d')

            # date for addon not processed, index it and give it key
            if not date in addons_dates[addon]:
                key = '%s-%s' % (addon, date)
                data = search.extract_contribution_counts(contribution)
                Contribution.index(data, bulk=True, id=key)
                addons_dates[addon][date] = 0

        if qs:
            log.info('Indexed %s addons/apps for contribution stats: %s' %
                     (len(addons_dates), qs[0]['created']))
        es.flush_bulk(forced=True)
    except Exception, exc:
        index_contribution_counts.retry(args=[ids], exc=exc)
        raise
コード例 #27
0
ファイル: __init__.py プロジェクト: chiehwen/elasticutils
    def setup_class(cls):
        """Class setup for tests.

        Checks to see if ES is running and if not, sets ``skip_test``
        to True on the class.
        """
        # Note: TestCase has no setup_class
        try:
            get_es().health()
        except pyelasticsearch.exceptions.ConnectionError:
            cls.skip_tests = True

        if cls.data:
            cls.create_index(settings={'mappings': cls.mapping})
            cls.index_data(cls.data)
            cls.refresh()
コード例 #28
0
ファイル: search.py プロジェクト: bebef1987/zamboni
def setup_indexes():
    es = elasticutils.get_es()
    for model in CollectionCount, DownloadCount, UpdateCount:
        index = model._get_index()
        try:
            es.create_index_if_missing(index)
        except pyes.ElasticSearchException:
            pass

    mapping = {
            'properties': {
                'id': {'type': 'long'},
                'count': {'type': 'long'},
                'data': {'dynamic': 'true',
                         'properties': {
                            'v': {'type': 'long'},
                            'k': {'type': 'string'}
                        }
                },
                'date': {'format':'dateOptionalTime',
                         'type':'date'}
            }
    }
    es.put_mapping(CollectionCount._meta.db_table, mapping,
                   CollectionCount._get_index())
コード例 #29
0
ファイル: views.py プロジェクト: james4388/zamboni
def elastic(request):
    INDEX = site_settings.ES_INDEXES["default"]
    es = elasticutils.get_es()
    mappings = {
        "addons": (addons.search.setup_mapping, addons.cron.reindex_addons),
        "collections": (addons.search.setup_mapping, bandwagon.cron.reindex_collections),
        "compat": (addons.search.setup_mapping, None),
        "users": (addons.search.setup_mapping, users.cron.reindex_users),
    }
    if request.method == "POST":
        if request.POST.get("reset") in mappings:
            name = request.POST["reset"]
            es.delete_mapping(INDEX, name)
            if mappings[name][0]:
                mappings[name][0]()
            messages.info(request, "Resetting %s." % name)
        if request.POST.get("reindex") in mappings:
            name = request.POST["reindex"]
            mappings[name][1]()
            messages.info(request, "Reindexing %s." % name)
        return redirect("zadmin.elastic")

    indexes = set(site_settings.ES_INDEXES.values())
    mappings = es.get_mapping(None, indexes)
    ctx = {
        "nodes": es.cluster_nodes(),
        "health": es.cluster_health(),
        "state": es.cluster_state(),
        "mappings": [(index, mappings.get(index, {})) for index in indexes],
    }
    return jingo.render(request, "zadmin/elastic.html", ctx)
コード例 #30
0
ファイル: views.py プロジェクト: PinZhang/zamboni
def elastic(request):
    INDEX = site_settings.ES_INDEXES['default']
    es = elasticutils.get_es()
    mappings = {'addons': (addons.search.setup_mapping,
                           addons.cron.reindex_addons),
                'collections': (addons.search.setup_mapping,
                                bandwagon.cron.reindex_collections),
                'compat': (addons.search.setup_mapping, None),
                'users': (addons.search.setup_mapping,
                          users.cron.reindex_users),
               }
    if request.method == 'POST':
        if request.POST.get('reset') in mappings:
            name = request.POST['reset']
            es.delete_mapping(INDEX, name)
            if mappings[name][0]:
                mappings[name][0]()
            messages.info(request, 'Resetting %s.' % name)
        if request.POST.get('reindex') in mappings:
            name = request.POST['reindex']
            mappings[name][1]()
            messages.info(request, 'Reindexing %s.' % name)
        return redirect('zadmin.elastic')

    indexes = set(site_settings.ES_INDEXES.values())
    mappings = es.get_mapping(None, indexes)
    ctx = {
        'nodes': es.cluster_nodes(),
        'health': es.cluster_health(),
        'state': es.cluster_state(),
        'mappings': [(index, mappings.get(index, {})) for index in indexes],
    }
    return jingo.render(request, 'zadmin/elastic.html', ctx)
コード例 #31
0
ファイル: models.py プロジェクト: trentonstrong/elasticutils
 def unindex(cls, id):
     """Removes a particular item from the search index."""
     elasticutils.get_es().delete(cls._get_index(), cls._meta.db_table, id)
コード例 #32
0
    def open_spider(self, spider):
        self.es = elasticutils.get_es()

        if self.es.indices.exists(index='doc-index'):
            self.es.indices.delete(index='doc-index')

        self.es.indices.create(index='doc-index',
                               body={
                                   'settings': {
                                       'analysis': {
                                           'filter': {
                                               'en_stop_filter': {
                                                   'type': 'stop',
                                                   'stopwords': ['_english_']
                                               },
                                               'en_stem_filter': {
                                                   'type': 'stemmer',
                                                   'name': 'minimal_english'
                                               }
                                           },
                                           'analyzer': {
                                               'en_analyzer': {
                                                   'type':
                                                   'custom',
                                                   'tokenizer':
                                                   'lowercase',
                                                   'filter': [
                                                       'asciifolding',
                                                       'word_delimiter',
                                                       'en_stop_filter',
                                                       'en_stem_filter'
                                                   ]
                                               }
                                           }
                                       },
                                   },
                                   'mappings': {
                                       'doc-section-type': {
                                           'analyzer': 'en_analyzer',
                                           'url': {
                                               'type': 'string'
                                           },
                                           'category': {
                                               'type': 'string'
                                           },
                                           'tags': {
                                               'type': 'string',
                                               'boost': 1.8
                                           },
                                           'title': {
                                               'type': 'string',
                                               'boost': 1
                                           },
                                           'content': {
                                               'type': 'string'
                                           },
                                           '_boost': {
                                               'name': 'boost',
                                               'null_value': 1.0
                                           }
                                       }
                                   }
                               })

        f = open('tags.json', 'r')
        self.tags = json.loads(f.read())
コード例 #33
0
 def get_es(cls):
     return get_es(**ESTestCase.es_settings)
コード例 #34
0
 def teardown_class(cls):
     es = get_es()
     es.delete_index('test')
コード例 #35
0
 def create_impl(self, config, config_no_sensitive):
     return get_es(config.hosts.splitlines(),
                   float(config.timeout),
                   send_get_body_as=config.body_as)
コード例 #36
0
 def get_es(cls):
     """Returns the Elasticsearch object specified by ``cls.es_settings``"""
     return get_es(**cls.es_settings)
コード例 #37
0
 def test_get_es_mocked(self):
     es = elasticutils.get_es()
     assert issubclass(es.__class__, mock.Mock)
コード例 #38
0
def setup_mapping():
    """Set up the addons index mapping."""
    # Mapping describes how elasticsearch handles a document during indexing.
    # Most fields are detected and mapped automatically.
    appver = {
        'dynamic': False,
        'properties': {
            'max': {
                'type': 'long'
            },
            'min': {
                'type': 'long'
            }
        }
    }
    mapping = {
        # Optional boosting during indexing.
        '_boost': {
            'name': '_boost',
            'null_value': 1.0
        },
        'properties': {
            # Turn off analysis on name so we can sort by it.
            'name_sort': {
                'type': 'string',
                'index': 'not_analyzed'
            },
            # Adding word-delimiter to split on camelcase and punctuation.
            'name': {
                'type': 'string',
                'analyzer': 'standardPlusWordDelimiter'
            },
            'summary': {
                'type': 'string',
                'analyzer': 'snowball'
            },
            'description': {
                'type': 'string',
                'analyzer': 'snowball'
            },
            'tags': {
                'type': 'string',
                'index': 'not_analyzed',
                'index_name': 'tag'
            },
            'platforms': {
                'type': 'integer',
                'index_name': 'platform'
            },
            'appversion': {
                'properties': dict((app.id, appver) for app in amo.APP_USAGE)
            },
        },
    }
    # Add room for language-specific indexes.
    for analyzer in amo.SEARCH_ANALYZER_MAP:
        mapping['properties']['name_' + analyzer] = {
            'type': 'string',
            'analyzer': analyzer,
        }
        mapping['properties']['summary_' + analyzer] = {
            'type': 'string',
            'analyzer': analyzer,
        }
        mapping['properties']['description_' + analyzer] = {
            'type': 'string',
            'analyzer': analyzer,
        }

    es = elasticutils.get_es()
    # Adjust the mapping for all models at once because fields are shared
    # across all doc types in an index. If we forget to adjust one of them
    # we'll get burned later on.
    for model in Addon, AppCompat, Collection, UserProfile:
        index = model._get_index()
        try:
            es.create_index_if_missing(index)
        except pyes.ElasticSearchException:
            pass
        try:
            es.put_mapping(model._meta.db_table, mapping, index)
        except pyes.ElasticSearchException, e:
            log.error(e)
コード例 #39
0
ファイル: __init__.py プロジェクト: mt3/elasticutils
 def get_es(cls):
     return get_es(default_indexes=[cls.index_name])
コード例 #40
0
    def handle(self, *args, **options):
        self.es = get_es(urls=settings.ES_URLS)

        chunk_size = options.get("chunk")
        index_suffix = options.get("index_suffix")

        if index_suffix:
            index_suffix = "_" + index_suffix

        all_models_to_index = set()
        if len(args):
            for app_name in args:
                for model in models.get_models(models.get_app(app_name)):
                    if issubclass(model, PolymorphicIndexable):
                        all_models_to_index.add(model)
        else:
            for app in models.get_apps():
                for model in models.get_models(app):
                    if issubclass(model, PolymorphicIndexable):
                        all_models_to_index.add(model)

        # remove redundant subclasses since the instance_of query will select them
        models_to_index = set()
        for model_i in all_models_to_index:
            should_add = True
            for model_j in all_models_to_index:
                if model_i != model_j and issubclass(model_i, model_j):
                    should_add = False
                    break
            if should_add:
                models_to_index.add(model_i)

        self.stdout.write(u"Indexing models: %s" % ', '.join([m.__name__ for m in models_to_index]))

        num_processed = 0
        payload = []
        for model in models_to_index:
            for instance in model.objects.instance_of(model).order_by("id").iterator():
                meta = {
                    "index": {
                        "_index": instance.get_index_name() + index_suffix,
                        "_type": instance.get_mapping_type_name(),
                        "_id": instance.pk
                    }
                }
                payload.append(meta)
                doc = instance.extract_document()
                payload.append(doc)
                if len(payload) / 2 == chunk_size:
                    response = self.es.bulk(body=payload)
                    good_items = [item for item in response["items"] if item["index"]["status"] <= 299]
                    if len(good_items) != len(payload) // 2:
                        self.stdout.write("Bulk indexing error! Item count mismatch.")
                        bad_items = [item for item in response["items"] if item["index"]["status"] > 201]
                        self.stdout.write("These were rejected: %s" % str(bad_items))
                        return "Bulk indexing failed."
                    num_processed += (len(payload) / 2)
                    self.stdout.write("Indexed %d items" % num_processed)
                    payload = []

        if payload:
            response = self.es.bulk(body=payload)
            num_processed += (len(payload) / 2)
            self.stdout.write("Indexed %d items" % num_processed)
コード例 #41
0
ファイル: sample_facets.py プロジェクト: mt3/elasticutils
"""
This is a sample program that uses PyES ES to create an index, create
a mapping, and index some data. Then it uses ElasticUtils S to show
some behavior with facets.
"""

from elasticutils import get_es, S

HOST = 'localhost:9200'
INDEX = 'fooindex'
DOCTYPE = 'testdoc'

es = get_es(hosts=HOST, default_indexes=[INDEX])

# This uses pyes ES.delete_index_if_exists.
es.delete_index_if_exists(INDEX)

# Define the mapping for the doctype 'testdoc'. It's got an id field,
# a title which is analyzed, and two fields that are lists of tags, so
# we don't want to analyze them.
#
# Note: The alternative for the tags is to analyze them and use the
# 'keyword' analyzer. Both not analyzing and using the keyword
# analyzer treats the values as a single term rather than tokenizing
# them and treating as multiple terms.
mapping = {
    DOCTYPE: {
        'properties': {
            'id': {
                'type': 'integer'
            },
コード例 #42
0
ファイル: tasks.py プロジェクト: vdt/zamboni
def index_users(ids, **kw):
    es = elasticutils.get_es()
    task_log.debug('Indexing users %s-%s [%s].' % (ids[0], ids[-1], len(ids)))
    for c in UserProfile.objects.filter(id__in=ids):
        UserProfile.index(search.extract(c), bulk=True, id=c.id)
    es.flush_bulk(forced=True)
コード例 #43
0
 def unindex(cls, id):
     elasticutils.get_es().delete(settings.ES_INDEX, cls._meta.app_label,
                                  id)
コード例 #44
0
    def test_bulk_index(self):
        ParentIndexable(foo="Fighters").save(index=False)
        ChildIndexable(foo="Fighters", bar=69).save(index=False)

        GrandchildIndexable(foo="Fighters",
                            bar=69,
                            baz=datetime.datetime.now() -
                            datetime.timedelta(hours=1)).save(index=False)

        SeparateIndexable(junk="Testing").save(index=False)

        # Let's make sure that nothing is indexed yet.
        self.assertEqual(ParentIndexable.search_objects.s().count(), 0)
        self.assertEqual(SeparateIndexable.search_objects.s().count(), 0)

        # Now that everything has been made, let's try a bulk_index.
        call_command("bulk_index")
        ParentIndexable.search_objects.refresh()
        SeparateIndexable.search_objects.refresh()

        # Let's make sure that everything has the right counts
        self.assertEqual(ParentIndexable.search_objects.s().count(), 3)
        self.assertEqual(SeparateIndexable.search_objects.s().count(), 1)

        # Let's add another one, make sure the counts are right.
        ParentIndexable(foo="Mr. T").save(index=False)
        self.assertEqual(ParentIndexable.search_objects.s().count(), 3)
        call_command("bulk_index")
        ParentIndexable.search_objects.refresh()
        self.assertEqual(ParentIndexable.search_objects.s().count(), 4)

        # Let's f**k up some data in ES.
        obj = ParentIndexable.objects.all()[0]
        es = get_es(urls=settings.ES_URLS)
        doc = obj.extract_document()
        doc["foo"] = "DATA LOVERS"
        es.update(index=obj.get_index_name(),
                  doc_type=obj.get_mapping_type_name(),
                  id=obj.id,
                  body=dict(doc=doc, doc_as_upsert=True),
                  refresh=True)

        # Make sure the bad data works
        self.assertEqual(
            ParentIndexable.search_objects.query(
                foo__match="DATA LOVERS").count(), 1)
        call_command("bulk_index")
        ParentIndexable.search_objects.refresh()
        self.assertEqual(
            ParentIndexable.search_objects.query(
                foo__match="DATA LOVERS").count(), 0)

        # Let's delete an item from the db.
        obj = ParentIndexable.objects.all()[0]
        obj.delete()

        # Make sure the count is the same
        self.assertEqual(ParentIndexable.search_objects.s().count(), 4)

        # This shoulnd't remove the item
        call_command("bulk_index")
        ParentIndexable.search_objects.refresh()
        self.assertEqual(ParentIndexable.search_objects.s().count(), 4)

        # This should
        call_command("synces", self.index_suffix, drop_existing_indexes=True)
        call_command("es_swap_aliases", self.index_suffix)
        call_command("bulk_index")
        ParentIndexable.search_objects.refresh()
        self.assertEqual(ParentIndexable.search_objects.s().count(), 3)
コード例 #45
0
            libraries_results.append(('Spidermonkey is ready!', True, None))
            # TODO: see if it works?
        else:
            status_summary['libraries'] = False
            msg = "You said it was at (%s)" % settings.SPIDERMONKEY
            libraries_results.append(('Spidermonkey not found!', False, msg))
    else:
        status_summary['libraries'] = False
        msg = "Please set SPIDERMONKEY in your settings file."
        libraries_results.append(("Spidermonkey isn't set up.", False, msg))

    elastic_results = None
    if settings.USE_ELASTIC:
        status_summary['elastic'] = False
        try:
            health = elasticutils.get_es().cluster_health()
            status_summary['elastic'] = health['status'] != 'red'
            elastic_results = health
        except Exception:
            elastic_results = traceback.format_exc()

    # Check file paths / permissions
    rw = (settings.TMP_PATH, settings.NETAPP_STORAGE, settings.UPLOADS_PATH,
          settings.ADDONS_PATH, settings.MIRROR_STAGE_PATH,
          settings.GUARDED_ADDONS_PATH, settings.ADDON_ICONS_PATH,
          settings.COLLECTIONS_ICON_PATH, settings.PREVIEWS_PATH,
          settings.USERPICS_PATH, settings.SPHINX_CATALOG_PATH,
          settings.SPHINX_LOG_PATH, dump_apps.Command.JSON_PATH)
    r = [os.path.join(settings.ROOT, 'locale')]
    filepaths = [(path, os.R_OK | os.W_OK, "We want read + write")
                 for path in rw]
コード例 #46
0
ファイル: index_utils.py プロジェクト: mariot/datawinners
def get_elasticsearch_handle(timeout=ELASTIC_SEARCH_TIMEOUT):
    return elasticutils.get_es(urls=ELASTIC_SEARCH_URL, timeout=timeout)
コード例 #47
0
ファイル: unique_id.py プロジェクト: venumurthy/datawinners
def _delete_unique_id_from_elastic_search(dbm, entity_type, document_id):
    elasticutils.get_es(urls=ELASTIC_SEARCH_URL,
                        timeout=ELASTIC_SEARCH_TIMEOUT).delete(
                            dbm.database_name, entity_type, document_id)
コード例 #48
0
import yaml

from elasticutils import get_es, S

# def create_mapping():
#      a = open('schema/deals.yaml')
#      b = yaml.load(a)
#      a.close()
#      return b

#mapping = create_mapping()
#fileformat = [{'company_name': 'homedepot', 'active': [{'20% off': 'ZYZZ', '50% off': 'REDDIT'}], 'inactive': [{'-10% off': 'DIVIDEBYZERO'}]}, 
#             {'company_name': 'lowes', 'active': [{'15% off': 'XCX', '100% off': 'HACKERNEWS'}], 'inactive': [{'Buy one get one': 'BOGO'}]},]
mapping = {'companies': {'properties': {'company_name': {'type': 'string'}, 'active': {'type': 'string'}, 'inactive': {'type': 'string'},}}}

es = get_es(hosts='localhost:9200', default_indexes=['dealsindex'])

def get_data_from_yaml():
    data = {}
    dataList = []
    a = glob.iglob("data/*.yaml")
    for file in a:
        b = open(file)
        c = yaml.load(b)
        dataList.append(c)
        b.close()
    # Elasticsearch wants a list of dictionaries, hence the conversion
    return dataList

def create_and_insert():
    es.delete_index_if_exists('dealsindex')
コード例 #49
0
This is a sample program that uses Elasticsearch (from elasticsearch-py)
object to create an index, create a mapping, and index some data. Then
it uses ElasticUtils S to show some behavior.
"""

from elasticutils import get_es, S

from elasticsearch.helpers import bulk_index

URL = 'localhost'
INDEX = 'fooindex'
DOCTYPE = 'testdoc'

# This creates an elasticsearch.Elasticsearch object which we can use
# to do all our indexing.
es = get_es(urls=[URL])

# First, delete the index if it exists.
es.indices.delete(index=INDEX, ignore=404)

# Define the mapping for the doctype 'testdoc'. It's got an id field,
# a title which is analyzed, and two fields that are lists of tags, so
# we don't want to analyze them.
mapping = {
    DOCTYPE: {
        'properties': {
            'id': {
                'type': 'integer'
            },
            'title': {
                'type': 'string',
コード例 #50
0
ファイル: unique_id.py プロジェクト: venumurthy/datawinners
def _refresh_elastic_search_index(dbm):
    elasticutils.get_es(
        urls=ELASTIC_SEARCH_URL,
        timeout=ELASTIC_SEARCH_TIMEOUT).refresh(index=dbm.database_name)
コード例 #51
0
def index_collections(ids, **kw):
    es = elasticutils.get_es()
    log.debug('Indexing collections %s-%s [%s].' % (ids[0], ids[-1], len(ids)))
    for c in Collection.objects.filter(id__in=ids):
        Collection.index(search.extract(c), bulk=True, id=c.id)
    es.flush_bulk(forced=True)
コード例 #52
0
ファイル: __init__.py プロジェクト: jasonthomas/kitsune
    def teardown_indexes(self):
        es = get_es()
        for index in settings.ES_INDEXES.values():
            es.delete_index_if_exists(index)

        settings.ES_LIVE_INDEXING = False
コード例 #53
0
 def index(cls, document, id=None, bulk=False, force_insert=False):
     """Wrapper around pyes.ES.index."""
     elasticutils.get_es().index(
         document, index=cls._get_index(), doc_type=cls._meta.db_table,
         id=id, bulk=bulk, force_insert=force_insert)
コード例 #54
0
 def get_es(cls):
     return get_es(**cls.es_settings)
コード例 #55
0
ファイル: base.py プロジェクト: theonion/django-elastimorphic
 def setUp(self):
     self.index_suffix = "vtest"
     self.es = get_es(urls=settings.ES_URLS)
     call_command("synces", self.index_suffix, drop_existing_indexes=True)
     call_command("es_swap_aliases", self.index_suffix)
コード例 #56
0
def compatibility_report():
    redis = redisutils.connections['master']
    docs = defaultdict(dict)

    # Gather all the data for the index.
    for app in amo.APP_USAGE:
        versions = [c for c in settings.COMPAT if c['app'] == app.id]

        log.info(u'Making compat report for %s.' % app.pretty)
        latest = UpdateCount.objects.aggregate(d=Max('date'))['d']
        qs = UpdateCount.objects.filter(addon__appsupport__app=app.id,
                                        addon__disabled_by_user=False,
                                        addon__status__in=amo.VALID_STATUSES,
                                        addon___current_version__isnull=False,
                                        date=latest)

        updates = dict(qs.values_list('addon', 'count'))
        for chunk in amo.utils.chunked(updates.items(), 50):
            chunk = dict(chunk)
            for addon in Addon.objects.filter(id__in=chunk):
                doc = docs[addon.id]
                doc.update(id=addon.id,
                           slug=addon.slug,
                           guid=addon.guid,
                           self_hosted=addon.is_selfhosted(),
                           binary=addon.binary_components,
                           name=unicode(addon.name),
                           created=addon.created,
                           current_version=addon.current_version.version,
                           current_version_id=addon.current_version.pk)
                doc['count'] = chunk[addon.id]
                doc.setdefault('top_95',
                               defaultdict(lambda: defaultdict(dict)))
                doc.setdefault('top_95_all', {})
                doc.setdefault('usage', {})[app.id] = updates[addon.id]
                doc.setdefault('works', {}).setdefault(app.id, {})

                # Populate with default counts for all app versions.
                for ver in versions:
                    doc['works'][app.id][vint(ver['main'])] = {
                        'success': 0,
                        'failure': 0,
                        'total': 0,
                        'failure_ratio': 0.0,
                    }

                # Group reports by `major`.`minor` app version.
                reports = (CompatReport.objects.filter(
                    guid=addon.guid, app_guid=app.guid).values_list(
                        'app_version', 'works_properly').annotate(Count('id')))
                for ver, works_properly, cnt in reports:
                    ver = vint(floor_version(ver))
                    major = [
                        v['main'] for v in versions
                        if vint(v['previous']) < ver <= vint(v['main'])
                    ]
                    if major:
                        w = doc['works'][app.id][vint(major[0])]
                        # Tally number of success and failure reports.
                        w['success' if works_properly else 'failure'] += cnt
                        w['total'] += cnt
                        # Calculate % of incompatibility reports.
                        w['failure_ratio'] = w['failure'] / float(w['total'])

                if app not in addon.compatible_apps:
                    continue
                compat = addon.compatible_apps[app]
                d = {
                    'min': compat.min.version_int,
                    'max': compat.max.version_int
                }
                doc.setdefault('support', {})[app.id] = d
                doc.setdefault('max_version', {})[app.id] = compat.max.version

        total = sum(updates.values())
        # Remember the total so we can show % of usage later.
        redis.hset('compat:%s' % app.id, 'total', total)

        # Figure out which add-ons are in the top 95% for this app.
        running_total = 0
        for addon, count in sorted(updates.items(),
                                   key=lambda x: x[1],
                                   reverse=True):
            running_total += count
            docs[addon]['top_95_all'][app.id] = running_total < (.95 * total)

    # Mark the top 95% of add-ons compatible with the previous version for each
    # app + version combo.
    for compat in settings.COMPAT:
        app, ver = compat['app'], vint(compat['previous'])
        # Find all the docs that have a max_version compatible with ver.
        supported = [
            doc for doc in docs.values() if app in doc.get('support', {})
            and doc['support'][app]['max'] >= ver
        ]
        # Sort by count so we can get the top 95% most-used add-ons.
        supported = sorted(supported, key=lambda d: d['count'], reverse=True)
        total = sum(doc['count'] for doc in supported)
        # Figure out which add-ons are in the top 95% for this app + version.
        running_total = 0
        for doc in supported:
            running_total += doc['count']
            doc['top_95'][app][ver] = running_total < (.95 * total)

    # Send it all to the index.
    for chunk in amo.utils.chunked(docs.values(), 150):
        for doc in chunk:
            AppCompat.index(doc, id=doc['id'], bulk=True)
        elasticutils.get_es().flush_bulk(forced=True)
コード例 #57
0
 def es(self):
     """Returns an elasticsearch object, using the ES URL from the Django settings"""
     return get_es(urls=settings.ES_URLS)
コード例 #58
0
ファイル: 264-locale-indexes.py プロジェクト: zzdjk6/zamboni
def columns():
    es = elasticutils.get_es()
    index = settings.ES_INDEXES['default']
    return es.get_mapping('addons', index)['addons']['properties'].keys()
コード例 #59
0
 def get_es(cls):
     return get_es(urls=settings.ES_URLS)
コード例 #60
0
ファイル: models.py プロジェクト: writefaruq/zamboni
 def unindex(cls, id):
     elasticutils.get_es().delete(cls._get_index(), cls._meta.db_table, id)