Example #1
0
    def setUp(self):
        """Setup API Testing."""
        # create an APP
        self.auto_user = user()
        up = self.auto_user.userprofile
        up.set_membership(Group, 'nice guy')
        up.set_membership(Skill, 'python')
        up.set_membership(Language, 'Greek')
        up.ircname = 'foobar'
        up.country = 'gr'
        up.region = 'Attika'
        up.city = 'Athens'
        up.save()
        self.auto_user.first_name = 'Foo'
        self.auto_user.last_name = 'Bar'
        self.auto_user.save()

        self.app = APIApp.objects.create(name='test_app',
                                         description='Foo',
                                         owner=self.mozillian,
                                         is_mozilla_app=False,
                                         is_active=False)

        index_all_profiles()
        get_es().flush(refresh=True)
Example #2
0
    def setUp(self):
        """Setup API Testing."""
        # create an APP
        self.auto_user = user()
        up = self.auto_user.userprofile
        up.set_membership(Group, 'nice guy')
        up.set_membership(Skill, 'python')
        up.set_membership(Language, 'Greek')
        up.ircname = 'foobar'
        up.country = 'gr'
        up.region = 'Attika'
        up.city = 'Athens'
        up.save()
        self.auto_user.first_name = 'Foo'
        self.auto_user.last_name = 'Bar'
        self.auto_user.save()

        self.app = APIApp.objects.create(name='test_app',
                                         description='Foo',
                                         owner=self.mozillian,
                                         is_mozilla_app=False,
                                         is_active=False)

        index_all_profiles()
        get_es().flush(refresh=True)
Example #3
0
 def index(cls, document, id=None, bulk=False, force_insert=False,
           index=None):
     """Wrapper around pyes.ES.index."""
     elasticutils.get_es().index(
         document, index=index or cls._get_index(),
         doc_type=cls._meta.db_table, id=id, bulk=bulk,
         force_insert=force_insert)
Example #4
0
 def index(cls, document, id=None, bulk=False, force_insert=False):
     """Wrapper around pyes.ES.index."""
     elasticutils.get_es().index(document,
                                 index=cls._get_index(),
                                 doc_type=cls._meta.db_table,
                                 id=id,
                                 bulk=bulk,
                                 force_insert=force_insert)
Example #5
0
    def refresh(self, timesleep=0):
        index = es_utils.WRITE_INDEX

        # Any time we're doing a refresh, we're making sure that the
        # index is ready to be queried.  Given that, it's almost
        # always the case that we want to run all the generated tasks,
        # then refresh.
        generate_tasks()

        get_es().refresh(index, timesleep=timesleep)
Example #6
0
    def setUpClass(cls):
        """Runs the :class:`TestCase` setup to add some data.

        Also flushes and refreshes the data so it's searchable via
        computer.

        """
        estestcase.ESTestCase.setUpClass()
        TestCase.setUpClass()
        get_es().flush(refresh=True)
Example #7
0
    def setUpClass(cls):
        """Runs the :class:`TestCase` setup to add some data.

        Also flushes and refreshes the data so it's searchable via
        computer.

        """
        estestcase.ESTestCase.setUpClass()
        TestCase.setUpClass()
        get_es().flush(refresh=True)
Example #8
0
    def refresh(self, run_tasks=True):
        index = es_utils.WRITE_INDEX

        if run_tasks:
            # Any time we're doing a refresh, we're making sure that
            # the index is ready to be queried. Given that, it's
            # almost always the case that we want to run all the
            # generated tasks, then refresh.
            generate_tasks()

        get_es().refresh(index)
        get_es().health(wait_for_status='yellow')
Example #9
0
def delete_index_if_exists(index):
    """Delete the specified index.

    :arg index: The name of the index to delete.

    """
    try:
        get_es().delete_index(index)
    except ElasticHttpNotFoundError:
        # Can ignore this since it indicates the index doesn't exist
        # and therefore there's nothing to delete.
        pass
Example #10
0
    def refresh(self, run_tasks=True):
        index = es_utils.WRITE_INDEX

        if run_tasks:
            # Any time we're doing a refresh, we're making sure that
            # the index is ready to be queried. Given that, it's
            # almost always the case that we want to run all the
            # generated tasks, then refresh.
            generate_tasks()

        get_es().refresh(index)
        get_es().health(wait_for_status='yellow')
Example #11
0
    def setUpClass(cls):
        super(ElasticTestCase, cls).setUpClass()

        if not getattr(settings, 'ES_URLS'):
            cls.skipme = True
            return

        # try to connect to ES and if it fails, skip ElasticTestCases.
        try:
            get_es().health()
        except es_utils.ES_EXCEPTIONS:
            cls.skipme = True
            return
Example #12
0
    def setUpClass(cls):
        super(ElasticTestCase, cls).setUpClass()

        if not getattr(settings, 'ES_URLS'):
            cls.skipme = True
            return

        # try to connect to ES and if it fails, skip ElasticTestCases.
        try:
            get_es().health()
        except es_utils.ES_EXCEPTIONS:
            cls.skipme = True
            return
Example #13
0
    def handle(self, *args, **options):
        url = options['url']
        if url:
            es = get_es(urls=[url])
        else:
            es = get_es()
        # We define some custom analyzers that our mappings can use.
        index_settings = {'mappings': {}, 'settings': get_analyzers()}

        # Retrieve the mappings for the index-enabled models.
        for mappingClass in self.MAPPINGS:
            model_name = mappingClass.get_mapping_type_name()
            index_settings['mappings'].update({model_name: mappingClass.get_mapping()})

        # Create a new index.
        new_index = 'index_%s' % (int(time.time()))
        es.indices.create(new_index, body=index_settings)
        self.index(new_index)

        # The default index name, (we will use as an alias).
        index_name = settings.ES_INDEXES['default']

        # Check if we have a current index.
        old_index = None
        aliases = es.indices.get_aliases(name=index_name)
        for key, value in aliases.iteritems():
            if value['aliases']:
                old_index = key

        # Change the alias to point to our new index, and remove the old index.

        self.stdout.write('Changing alias "%s" from old index "%s" to new index "%s"' %
                          (index_name, old_index, new_index))
        if old_index:
            es.indices.update_aliases({'actions':
                                       [{'remove': {'index': old_index, 'alias': index_name}},
                                        {'add': {'index': new_index, 'alias': index_name}}]})
            es.indices.delete(old_index)
        else:
            if es.indices.exists(index_name):
                # This is a corner case. There was no alias named index_name, but
                # an index index_name nevertheless exists, this only happens when the index
                # was already created (because of ES auto creation features).
                es.indices.delete(index_name)
            es.indices.update_aliases({'actions':
                                       [{'add': {'index': new_index, 'alias': index_name}}]})

        # Finally re-index one more time, to pick up updates that were written during our command.
        # Note that this models that do not use the DeletedMixin will not work this way.
        self.index(index_name)
        self.unindex(index_name)
Example #14
0
    def test_remove_index(self):
        # Putting a test_amo index in the way.
        es = elasticutils.get_es()

        for index in es.get_indices().keys():
            for prefix in ('test_amo', 'test_amo_stats'):
                if index.startswith(prefix + '-'):
                    es.delete_alias(prefix, [index])
                    es.delete_index(index)
                    es.create_index(prefix)

        # reindexing the first app
        self.webapp.save()
        self.refresh()

        # now doing a reindexation in a background process
        args = [
            sys.executable, 'manage.py', 'reindex', '--prefix=test_',
            '--settings=%s' % self.settings
        ]

        indexer = subprocess.Popen(args,
                                   stdout=subprocess.PIPE,
                                   stderr=subprocess.PIPE,
                                   cwd=settings.ROOT)
        stdout, stderr = indexer.communicate()
        self.assertTrue('Reindexation done' in stdout, stdout + '\n' + stderr)
Example #15
0
def recreate_index(es=None):
    """Deletes index if it's there and creates a new one"""
    if es is None:
        es = get_es()

    from search.models import get_search_models

    search_models = get_search_models()
    merged_mapping = {
        SUMO_DOCTYPE: {
            'properties': merge_mappings(
                [(cls._meta.db_table, cls.get_mapping())
                 for cls in search_models])
            }
        }

    index = WRITE_INDEX
    delete_index(index)

    # There should be no mapping-conflict race here since the index doesn't
    # exist. Live indexing should just fail.

    # Simultaneously create the index and the mappings, so live
    # indexing doesn't get a chance to index anything between the two
    # and infer a bogus mapping (which ES then freaks out over when we
    # try to lay in an incompatible explicit mapping).

    es.create_index(index, settings={'mappings': merged_mapping})
Example #16
0
def index_finance_total_by_currency(addons, **kw):
    """
    Bug 757581
    Total finance stats, currency breakdown.
    """
    es = elasticutils.get_es()
    log.info('Indexing total financial stats by currency for %s apps.' %
             len(addons))

    for addon in addons:
        # Get all contributions for given add-on.
        qs = Contribution.objects.filter(addon=addon, uuid=None)
        if not qs.exists():
            continue

        # Get list of distinct currencies.
        currencies = set(qs.values_list('currency', flat=True))

        for currency in currencies:
            try:
                key = ord_word('cur' + str(addon) + currency.lower())
                data = search.get_finance_total(qs,
                                                addon,
                                                'currency',
                                                currency=currency)
                if not already_indexed(Contribution, data):
                    Contribution.index(data, bulk=True, id=key)
                es.flush_bulk(forced=True)
            except Exception, exc:
                index_finance_total_by_currency.retry(args=[addons], exc=exc)
                raise
Example #17
0
def setup_mkt_indexes(index=None, aliased=True):
    """
    Define explicit ES mappings for models. If a field is not explicitly
    defined and a field is inserted, ES will dynamically guess the type and
    insert it, in a schemaless manner.
    """
    es = elasticutils.get_es()
    for model in [Contribution, InappPayment]:
        index = index or model._get_index()
        index = create_es_index_if_missing(index, aliased=aliased)

        mapping = {
            'properties': {
                'id': {'type': 'long'},
                'date': {'format': 'dateOptionalTime',
                         'type': 'date'},
                'count': {'type': 'long'},
                'revenue': {'type': 'double'},

                # Try to tell ES not to 'analyze' the field to querying with
                # hyphens and lowercase letters.
                'currency': {'type': 'string',
                             'index': 'not_analyzed'},
                'source': {'type': 'string',
                           'index': 'not_analyzed'},
                'inapp': {'type': 'string',
                          'index': 'not_analyzed'}
            }
        }

        es.put_mapping(model._meta.db_table, mapping, index)
Example #18
0
def index_all_profiles():
    # Get an es object, delete index and re-create it
    es = get_es(timeout=settings.ES_INDEXING_TIMEOUT)
    mappings = {'mappings':
                {UserProfileMappingType.get_mapping_type_name():
                 UserProfileMappingType.get_mapping()}}

    def _recreate_index(index):
        es.indices.delete(index=index, ignore=[400, 404])
        es.indices.create(index, body=mappings)

    _recreate_index(settings.ES_INDEXES['default'])
    _recreate_index(settings.ES_INDEXES['public'])

    # mozillians index
    ids = UserProfile.objects.complete().values_list('id', flat=True)
    ts = [index_objects.subtask(kwargs={'mapping_type': UserProfileMappingType,
                                        'ids': ids,
                                        'chunk_size': 150,
                                        'public_index': False})]

    # public index
    ids = (UserProfile.objects.complete().public_indexable()
           .privacy_level(PUBLIC).values_list('id', flat=True))
    ts += [index_objects.subtask(kwargs={'mapping_type': UserProfileMappingType,
                                         'ids': ids,
                                         'chunk_size': 150,
                                         'public_index': True})]

    TaskSet(ts).apply_async()
Example #19
0
def recreate_index(es=None):
    """Deletes index if it's there and creates a new one"""
    if es is None:
        es = get_es()

    from search.models import get_search_models

    search_models = get_search_models()
    merged_mapping = {
        SUMO_DOCTYPE: {
            'properties':
            merge_mappings([(cls._meta.db_table, cls.get_mapping())
                            for cls in search_models])
        }
    }

    index = WRITE_INDEX
    delete_index(index)

    # There should be no mapping-conflict race here since the index doesn't
    # exist. Live indexing should just fail.

    # Simultaneously create the index and the mappings, so live
    # indexing doesn't get a chance to index anything between the two
    # and infer a bogus mapping (which ES then freaks out over when we
    # try to lay in an incompatible explicit mapping).

    es.create_index(index, settings={'mappings': merged_mapping})
Example #20
0
def index_installed_daily(ids, **kw):
    """
    Takes a list of Installed ids and uses its addon and date fields to index
    stats for that day.
    ids -- ids of mkt.webapps.Installed objects
    """
    from mkt.webapps.models import Installed
    es = elasticutils.get_es()
    # Get Installed's
    qs = (Installed.objects.filter(
        id__in=set(ids)).order_by('-created').values('addon', 'created'))
    log.info('[%s] Indexing %s installed counts for daily stats.' %
             (qs[0]['created'], len(qs)))

    addons_dates = defaultdict(lambda: defaultdict(dict))
    for installed in qs:
        addon = installed['addon']
        date = installed['created'].strftime('%Y%m%d')

        try:
            if not date in addons_dates[addon]:
                key = ord_word('ins' + str(addon) + str(date))
                data = search.get_installed_daily(installed)
                if not already_indexed(Installed, data):
                    Installed.index(data, bulk=True, id=key)
                addons_dates[addon][date] = 0
            es.flush_bulk(forced=True)
        except Exception, exc:
            index_installed_daily.retry(args=[ids], exc=exc)
            raise
Example #21
0
def index_finance_total_by_src(addons, **kw):
    """
    Bug 758059
    Total finance stats, source breakdown.
    """
    es = elasticutils.get_es()
    log.info('Indexing total financial stats by source for %s apps.' %
             len(addons))

    for addon in addons:
        # Get all contributions for given add-on.
        qs = Contribution.objects.filter(addon=addon, uuid=None)
        if not qs.exists():
            continue

        # Get list of distinct sources.
        sources = set(qs.values_list('source', flat=True))

        for source in sources:
            try:
                key = ord_word('src' + str(addon) + str(source))
                data = search.get_finance_total(qs,
                                                addon,
                                                'source',
                                                source=source)
                if not already_indexed(Contribution, data):
                    Contribution.index(data, bulk=True, id=key)
                es.flush_bulk(forced=True)
            except Exception, exc:
                index_finance_total_by_src.retry(args=[addons], exc=exc)
                raise
Example #22
0
def index_all_profiles():
    # Get an es object, delete index and re-create it
    es = get_es(timeout=settings.ES_INDEXING_TIMEOUT)
    mappings = {
        'mappings': {
            UserProfileMappingType.get_mapping_type_name():
            UserProfileMappingType.get_mapping()
        }
    }

    def _recreate_index(index):
        es.indices.delete(index=index, ignore=[400, 404])
        es.indices.create(index, body=mappings)

    _recreate_index(settings.ES_INDEXES['default'])
    _recreate_index(settings.ES_INDEXES['public'])

    # mozillians index
    ids = UserProfile.objects.complete().values_list('id', flat=True)
    ts = [
        index_objects.subtask(args=[UserProfileMappingType, chunk, 150, False])
        for chunk in chunked(sorted(list(ids)), 150)
    ]

    # public index
    ts += [
        index_objects.subtask(args=[UserProfileMappingType, chunk, 150, True])
        for chunk in chunked(sorted(list(ids)), 150)
    ]

    TaskSet(ts).apply_async()
Example #23
0
def setup_mkt_indexes():
    """
    Define explicit ES mappings for models. If a field is not explicitly
    defined and a field is inserted, ES will dynamically guess the type and
    insert it, in a schemaless manner.
    """
    es = elasticutils.get_es()
    for model in [Contribution, InappPayment]:
        index = model._get_index()
        create_es_index_if_missing(index)

        mapping = {
            'properties': {
                'id': {'type': 'long'},
                'date': {'format': 'dateOptionalTime',
                         'type': 'date'},
                'count': {'type': 'long'},
                'revenue': {'type': 'double'},

                # Try to tell ES not to 'analyze' the field to querying with
                # hyphens and lowercase letters.
                'currency': {'type': 'string',
                             'index': 'not_analyzed'},
                'source': {'type': 'string',
                           'index': 'not_analyzed'},
                'inapp': {'type': 'string',
                          'index': 'not_analyzed'}
            }
        }

        es.put_mapping(model._meta.db_table, mapping,
                       model._get_index())
Example #24
0
    def __init__(self, *args, **kwargs):
        from elasticutils.contrib.django import get_es
        es = get_es()

        es.indices.create(index=ES_INDEXES['default'], ignore=400)
        es.indices.create(index=ES_INDEXES['public'], ignore=400)
        super(TestCase, self).__init__(*args, **kwargs)
Example #25
0
def setup_mkt_indexes(index=None, aliased=True):
    """
    Define explicit ES mappings for models. If a field is not explicitly
    defined and a field is inserted, ES will dynamically guess the type and
    insert it, in a schemaless manner.
    """
    es = elasticutils.get_es()
    for model in [Contribution, InappPayment]:
        index = index or model._get_index()
        index = create_es_index_if_missing(index, aliased=aliased)

        mapping = {
            "properties": {
                "id": {"type": "long"},
                "date": {"format": "dateOptionalTime", "type": "date"},
                "count": {"type": "long"},
                "revenue": {"type": "double"},
                # Try to tell ES not to 'analyze' the field to querying with
                # hyphens and lowercase letters.
                "currency": {"type": "string", "index": "not_analyzed"},
                "source": {"type": "string", "index": "not_analyzed"},
                "inapp": {"type": "string", "index": "not_analyzed"},
            }
        }

        es.put_mapping(model._meta.db_table, mapping, index)
Example #26
0
    def handle(self, index_suffix, **options):
        index_suffix = '_' + index_suffix
        indexes = {}
        for name, model in polymorphic_indexable_registry.all_models.items():
            alias = model.get_index_name()
            index = alias + index_suffix
            if alias not in indexes:
                indexes[alias] = index

        es = get_es()
        alias_actions = []
        # remove existing indexes using the aliases we want
        existing_aliases = es.indices.get_aliases()
        for index, aliases in existing_aliases.items():
            for alias, new_index in indexes.items():
                if "aliases" in aliases:
                    if alias in aliases["aliases"]:
                        alias_actions.append({
                            "remove": {
                                "alias": alias,
                                "index": index
                            }
                        })
        # add our new aliases
        for alias, index in indexes.items():
            alias_actions.append({
                "add": {
                    "alias": alias,
                    "index": index
                }
            })
        es.indices.update_aliases(body=dict(actions=alias_actions))
Example #27
0
def index_chunk(cls, id_list, es=None):
    """Index a chunk of documents.

    :arg cls: The MappingType class.
    :arg id_list: Iterable of ids of that MappingType to index.
    :arg es: The ES to use. Defaults to creating a new indexing ES.

    """
    if es is None:
        es = get_es()

    for ids in chunked(id_list, 200):
        documents = []
        obj_list = cls.get_model().uncached.filter(id__in=ids)
        documents = [
            cls.extract_document(obj_id=obj.id, obj=obj) for obj in obj_list
        ]

        if documents:
            cls.bulk_index(documents, id_field='id', es=es)

    if settings.DEBUG:
        # Nix queries so that this doesn't become a complete
        # memory hog and make Will's computer sad when DEBUG=True.
        reset_queries()
Example #28
0
    def index(cls, document, id_=None, bulk=False, force_insert=False,
              es=None):
        """Adds or updates a document to the index

        :arg document: Python dict of key/value pairs representing
            the document

            .. Note::

               This must be serializable into JSON.

        :arg id_: the Django ORM model instance id---this is used to
            convert an ES search result back to the Django ORM model
            instance from the db. It should be an integer.
        :arg bulk: Whether or not this is part of a bulk indexing.  If
            this is, you must provide an ES with the `es` argument,
            too.
        :arg force_insert: TODO
        :arg es: The ES to use. If you don't specify an ES, it'll
            use `elasticutils.contrib.django.get_es()`.

        :raises ValueError: if `bulk` is True, but `es` is None.

        TODO: add example.

        """
        if bulk and es is None:
            raise ValueError('bulk is True, but es is None')

        if es is None:
            es = get_es()

        es.index(
            document, index=cls.get_index(), doc_type=cls.get_mapping_type(),
            id=id_, bulk=bulk, force_insert=force_insert)
Example #29
0
def index_chunk(cls, id_list, reraise=False, es=None):
    """Index a chunk of documents.

    :arg cls: The MappingType class.
    :arg id_list: Iterable of ids of that MappingType to index.
    :arg reraise: False if you want errors to be swallowed and True
        if you want errors to be thrown.
    :arg es: The ES to use. Defaults to creating a new indexing ES.

    """
    if es is None:
        es = get_es()

    for ids in chunked(id_list, 200):
        documents = []
        obj_list = cls.get_model().objects.filter(id__in=ids)
        for obj in obj_list:
            try:
                documents.append(cls.extract_document(obj_id=obj.id, obj=obj))
            except Exception:
                log.exception('Unable to extract/index document (id: %d)',
                              obj.id)
                if reraise:
                    raise

        if documents:
            cls.bulk_index(documents, id_field='id', es=es)

    if settings.DEBUG:
        # Nix queries so that this doesn't become a complete
        # memory hog and make Will's computer sad when DEBUG=True.
        reset_queries()
Example #30
0
    def __init__(self, *args, **kwargs):
        from elasticutils.contrib.django import get_es
        es = get_es()

        es.indices.create(index=ES_INDEXES['default'], ignore=400)
        es.indices.create(index=ES_INDEXES['public'], ignore=400)
        super(TestCase, self).__init__(*args, **kwargs)
Example #31
0
def index_chunk(cls, chunk, reraise=False, es=None):
    """Index a chunk of documents.

    :arg cls: The MappingType class.
    :arg chunk: Iterable of ids of that MappingType to index.
    :arg reraise: False if you want errors to be swallowed and True
        if you want errors to be thrown.
    :arg es: The ES to use. Defaults to creating a new indexing ES.

    .. Note::

       This indexes all the documents in the chunk in one single bulk
       indexing call. Keep that in mind when you break your indexing
       task into chunks.

    """
    if es is None:
        es = get_es()

    documents = []
    for id_ in chunk:
        try:
            documents.append(cls.extract_document(id_))
        except Exception:
            log.exception('Unable to extract/index document (id: %d)', id_)
            if reraise:
                raise

    cls.bulk_index(documents, id_field='id', es=es)
Example #32
0
def es_analyze(text, analyzer=None):
    """Returns analysis of text.

    :arg text: the text to analyze

    :arg analyzer: (optional) the analyzer to use. Defaults to snowball
        which is an English-settings analyzer.

    :returns: list of dicts each describing a token

    """
    es = get_es()
    index = get_index()

    analyzer = analyzer or 'snowball'

    # pyelasticsearch doesn't support analyze, so we do it "manually"
    # using pyelasticsearch's innards. When we update to
    # elasticsearch-py we should rewrite this.
    ret = es.send_request(
        'GET',
        [index, '_analyze'],
        query_params={'analyzer': analyzer},
        body=text)

    return ret['tokens']
Example #33
0
    def setup_class(cls):
        super(QueryTest, cls).setup_class()
        if cls.skip_tests:
            return

        try:
            import django
        except ImportError:
            cls.skip_tests = True
            return

        from elasticutils.contrib.django import get_es

        es = get_es()
        try:
            es.delete_index_if_exists(cls.index_name)
        except pyes.exceptions.IndexMissingException:
            # TODO: No clue why this is throwing an IndexMissingException
            # because I thought the whole point of delete_index_if_exists
            # is that it _didn't_ throw an exception if the index was
            # missing.
            pass
        data1 = FakeModel(id=1, foo='bar', tag='awesome', width='2')
        data2 = FakeModel(id=2, foo='barf', tag='boring', width='7')
        data3 = FakeModel(id=3, foo='car', tag='awesome', width='5')
        data4 = FakeModel(id=4, foo='duck', tag='boat', width='11')
        data5 = FakeModel(id=5, foo='train car', tag='awesome', width='7')

        for data in (data1, data2, data3, data4, data5):
            es.index(data.__dict__, cls.index_name, FakeModel._meta.db_table,
                    bulk=True, id=data.id)
        es.refresh()
Example #34
0
def index_finance_daily_inapp(ids, **kw):
    """
    Similar to index_finance_daily, except for InappPayments.

    ids -- ids of mkt.stats.webapps.InappPayment objects
    """
    es = elasticutils.get_es()

    # Get contributions.
    qs = (InappPayment.objects.filter(id__in=ids).order_by('created').values(
        'name', 'config__addon', 'created'))
    log.info('[%s] Indexing %s in-app payments for daily stats.' %
             (qs[0]['created'], len(ids)))

    # It's defaultdicts all the way down.
    addons_inapps_dates = defaultdict(
        lambda: defaultdict(lambda: defaultdict(int)))
    for payment in qs:
        addon = payment['config__addon']
        inapp = payment['name']
        date = payment['created'].strftime('%Y%m%d')

        # Date for add-on not processed, index it and give it key.
        if not date in addons_inapps_dates[addon][inapp]:
            key = ord_word('fin%s%s%s' % (str(addon), str(inapp), str(date)))
            data = search.get_finance_daily_inapp(payment)
            try:
                if not already_indexed(InappPayment, data):
                    InappPayment.index(data, bulk=True, id=key)
                addons_inapps_dates[addon][inapp][date] = 0
                es.flush_bulk(forced=True)
            except Exception, exc:
                index_finance_daily_inapp.retry(args=[ids], exc=exc)
                raise
Example #35
0
 def unindex(cls, id):
     es = elasticutils.get_es()
     try:
         es.delete(cls._get_index(), cls._meta.db_table, id)
     except pyes.exceptions.NotFoundException:
         # Item wasn't found, whatevs.
         pass
Example #36
0
def index_collections(ids, **kw):
    es = elasticutils.get_es()
    log.debug('Indexing collections %s-%s [%s].' % (ids[0], ids[-1], len(ids)))
    qs = Collection.uncached.filter(id__in=ids).transform(attach_translations)
    for c in qs:
        Collection.index(search.extract(c), bulk=True, id=c.id)
    es.flush_bulk(forced=True)
def remap(index_cls):

        #Get an instance of the elasticsearch python wrapper
        es = get_es()
        index = index_cls.get_index()
        model = index_cls.get_model()
        type_name = index_cls.get_mapping_type_name()
        
        #Delete the mapping if it exists
        if es.indices.exists(index=index):
            try:
                es.indices.delete_mapping(index=index, doc_type=type_name)
            except:
                pass
            
        #Put the mapping
        #Comment this out for letting elasticsearch generate the mapping
        result = es.indices.put_mapping(
            index=index,
            doc_type=type_name,
            body={
                type_name : index_cls.get_mapping()
            }
        )

        if not result['acknowledged']:
            print "Mapping was not acknowledged by elasticsearch"
        else:
            print "Mapping acknowledged by elasticsearch"
Example #38
0
def setup_indexes(index=None, aliased=True):
    es = elasticutils.get_es()
    for model in CollectionCount, DownloadCount, UpdateCount:
        index = index or model._get_index()
        index = create_es_index_if_missing(index, aliased=aliased)

        mapping = {
            'properties': {
                'id': {
                    'type': 'long'
                },
                'count': {
                    'type': 'long'
                },
                'data': {
                    'dynamic': 'true',
                    'properties': {
                        'v': {
                            'type': 'long'
                        },
                        'k': {
                            'type': 'string'
                        }
                    }
                },
                'date': {
                    'format': 'dateOptionalTime',
                    'type': 'date'
                }
            }
        }
        es.put_mapping(model._meta.db_table, mapping, index)
Example #39
0
def index_finance_total_inapp(addons, **kw):
    """
    Bug 758071
    Aggregates financial stats from all of the contributions for in-apps.
    """
    es = elasticutils.get_es()
    log.info('Indexing total financial in-app stats for %s apps.' %
             len(addons))

    for addon in addons:
        # Get all in-app names for given addon.
        inapps = set(InappPayment.objects.filter(config__addon=addon).
            values_list('name', flat=True))

        for inapp_name in inapps:
            # Get all in-app payments for given in-app.
            qs = InappPayment.objects.filter(name=inapp_name,
                                             contribution__uuid=None)
            if not qs.exists():
                continue

            try:
                key = ord_word('totinapp' + str(addon) + inapp_name)
                data = search.get_finance_total_inapp(qs, addon, inapp_name)
                if not already_indexed(InappPayment, data):
                    InappPayment.index(data, bulk=True, id=key)
                es.flush_bulk(forced=True)
            except Exception, exc:
                index_finance_total_inapp.retry(args=[addons], exc=exc)
                raise
Example #40
0
def index_finance_total_inapp(addons, **kw):
    """
    Bug 758071
    Aggregates financial stats from all of the contributions for in-apps.
    """
    es = elasticutils.get_es()
    log.info('Indexing total financial in-app stats for %s apps.' %
             len(addons))

    for addon in addons:
        # Get all in-app names for given addon.
        inapps = set(
            InappPayment.objects.filter(config__addon=addon).values_list(
                'name', flat=True))

        for inapp_name in inapps:
            # Get all in-app payments for given in-app.
            qs = InappPayment.objects.filter(name=inapp_name,
                                             contribution__uuid=None)
            if not qs.exists():
                continue

            try:
                key = ord_word('totinapp' + str(addon) + inapp_name)
                data = search.get_finance_total_inapp(qs, addon, inapp_name)
                if not already_indexed(InappPayment, data):
                    InappPayment.index(data, bulk=True, id=key)
                es.flush_bulk(forced=True)
            except Exception, exc:
                index_finance_total_inapp.retry(args=[addons], exc=exc)
                raise
Example #41
0
 def unindex(cls, id):
     es = elasticutils.get_es()
     try:
         es.delete(cls._get_index(), cls._meta.db_table, id)
     except pyes.exceptions.NotFoundException:
         # Item wasn't found, whatevs.
         pass
Example #42
0
def index_installed_daily(ids, **kw):
    """
    Takes a list of Installed ids and uses its addon and date fields to index
    stats for that day.
    ids -- ids of mkt.webapps.Installed objects
    """
    from mkt.webapps.models import Installed
    es = elasticutils.get_es()
    # Get Installed's
    qs = (Installed.objects.filter(id__in=set(ids)).
        order_by('-created').values('addon', 'created'))
    log.info('[%s] Indexing %s installed counts for daily stats.' %
             (qs[0]['created'], len(qs)))

    addons_dates = defaultdict(lambda: defaultdict(dict))
    for installed in qs:
        addon = installed['addon']
        date = installed['created'].strftime('%Y%m%d')

        try:
            if not date in addons_dates[addon]:
                key = ord_word('ins' + str(addon) + str(date))
                data = search.get_installed_daily(installed)
                if not already_indexed(Installed, data):
                    Installed.index(data, bulk=True, id=key)
                addons_dates[addon][date] = 0
            es.flush_bulk(forced=True)
        except Exception, exc:
            index_installed_daily.retry(args=[ids], exc=exc)
            raise
Example #43
0
def index_all_profiles():
    # Get an es object, delete index and re-create it

    index = settings.ES_INDEXES['default']
    es = get_es(timeout=settings.ES_INDEXING_TIMEOUT)
    try:
        es.delete_index_if_exists(index)
    except pyes.exceptions.IndexMissingException:
        pass

    mappings = {
        'mappings': {
            UserProfile._meta.db_table: UserProfile.get_mapping()
        }
    }

    es.create_index(index, settings=mappings)

    ids = (UserProfile.objects.exclude(full_name='').values_list('id',
                                                                 flat=True))
    ts = [
        tasks.index_objects.subtask(args=[UserProfile, chunk])
        for chunk in chunked(sorted(list(ids)), 150)
    ]
    TaskSet(ts).apply_async()
Example #44
0
def index_finance_total_by_src(addons, **kw):
    """
    Bug 758059
    Total finance stats, source breakdown.
    """
    es = elasticutils.get_es()
    log.info('Indexing total financial stats by source for %s apps.' %
              len(addons))

    for addon in addons:
        # Get all contributions for given add-on.
        qs = Contribution.objects.filter(addon=addon, uuid=None)
        if not qs.exists():
            continue

        # Get list of distinct sources.
        sources = set(qs.values_list('source', flat=True))

        for source in sources:
            try:
                key = ord_word('src' + str(addon) + str(source))
                data = search.get_finance_total(qs, addon, 'source',
                                                source=source)
                if not already_indexed(Contribution, data):
                    Contribution.index(data, bulk=True, id=key)
                es.flush_bulk(forced=True)
            except Exception, exc:
                index_finance_total_by_src.retry(args=[addons], exc=exc)
                raise
Example #45
0
def index_all_profiles():
    # Get an es object, delete index and re-create it
    es = get_es(timeout=settings.ES_INDEXING_TIMEOUT)
    mappings = {'mappings':
                {UserProfile._meta.db_table: UserProfile.get_mapping()}}

    def _recreate_index(index):
        try:
            es.delete_index_if_exists(index)
        except pyes.exceptions.IndexMissingException:
            pass
        es.create_index(index, settings=mappings)
    _recreate_index(settings.ES_INDEXES['default'])
    _recreate_index(settings.ES_INDEXES['public'])

    # mozillians index
    ids = UserProfile.objects.complete().values_list('id', flat=True)
    ts = [index_objects.subtask(args=[UserProfile, chunk, False])
          for chunk in chunked(sorted(list(ids)), 150)]

    # public index
    ids = (UserProfile.objects.complete().public_indexable()
           .privacy_level(PUBLIC).values_list('id', flat=True))
    ts += [index_objects.subtask(args=[UserProfile, chunk, True])
           for chunk in chunked(sorted(list(ids)), 150)]

    TaskSet(ts).apply_async()
Example #46
0
def index_finance_total_by_currency(addons, **kw):
    """
    Bug 757581
    Total finance stats, currency breakdown.
    """
    es = elasticutils.get_es()
    log.info('Indexing total financial stats by currency for %s apps.' %
              len(addons))

    for addon in addons:
        # Get all contributions for given add-on.
        qs = Contribution.objects.filter(addon=addon, uuid=None)
        if not qs.exists():
            continue

        # Get list of distinct currencies.
        currencies = set(qs.values_list('currency', flat=True))

        for currency in currencies:
            try:
                key = ord_word('cur' + str(addon) + currency.lower())
                data = search.get_finance_total(
                    qs, addon, 'currency', currency=currency)
                if not already_indexed(Contribution, data):
                    Contribution.index(data, bulk=True, id=key)
                es.flush_bulk(forced=True)
            except Exception, exc:
                index_finance_total_by_currency.retry(args=[addons], exc=exc)
                raise
Example #47
0
def recreate_indexes(es=None, indexes=None):
    """Deletes indexes and recreates them.

    :arg es: An ES object to use. Defaults to calling `get_es()`.
    :arg indexes: A list of indexes to recreate. Defaults to all write
        indexes.
    """
    if es is None:
        es = get_es()
    if indexes is None:
        indexes = all_write_indexes()

    for index in indexes:
        delete_index(index)

        # There should be no mapping-conflict race here since the index doesn't
        # exist. Live indexing should just fail.

        # Simultaneously create the index, the mappings, the analyzers, and
        # the tokenizers, so live indexing doesn't get a chance to index
        # anything between and infer a bogus mapping (which ES then freaks
        # out over when we try to lay in an incompatible explicit mapping).
        es.indices.create(index=index,
                          body={
                              'mappings': get_mappings(index),
                              'settings': {
                                  'analysis': get_analysis(),
                              }
                          })

    # Wait until the index is there.
    es.cluster.health(wait_for_status='yellow')
Example #48
0
    def test_remove_index(self):
        # Putting a test_amo index in the way.
        es = elasticutils.get_es()

        for index in es.get_indices().keys():
            for prefix in ('test_amo', 'test_amo_stats'):
                if index.startswith(prefix + '-'):
                    es.delete_alias(prefix, [index])
                    es.delete_index(index)
                    es.create_index(prefix)

        # reindexing the first app
        self.webapp.save()
        self.refresh()

        # now doing a reindexation in a background process
        args = [sys.executable, 'manage.py', 'reindex', '--prefix=test_',
                '--settings=%s' % self.settings]

        indexer = subprocess.Popen(args,
                                   stdout=subprocess.PIPE,
                                   stderr=subprocess.PIPE,
                                   cwd=settings.ROOT)
        stdout, stderr = indexer.communicate()
        self.assertTrue('Reindexation done' in stdout, stdout + '\n' + stderr)
Example #49
0
def recreate_indexes(es=None, indexes=None):
    """Deletes indexes and recreates them.

    :arg es: An ES object to use. Defaults to calling `get_es()`.
    :arg indexes: A list of indexes to recreate. Defaults to all write
        indexes.
    """
    if es is None:
        es = get_es()
    if indexes is None:
        indexes = all_write_indexes()

    for index in indexes:
        delete_index(index)

        # There should be no mapping-conflict race here since the index doesn't
        # exist. Live indexing should just fail.

        # Simultaneously create the index, the mappings, the analyzers, and
        # the tokenizers, so live indexing doesn't get a chance to index
        # anything between and infer a bogus mapping (which ES then freaks
        # out over when we try to lay in an incompatible explicit mapping).
        es.indices.create(index=index, body={
            'mappings': get_mappings(index),
            'settings': {
                'analysis': get_analysis(),
            }
        })

    # Wait until the index is there.
    es.cluster.health(wait_for_status='yellow')
Example #50
0
def unindex_objects(mapping_type, ids, public_index, **kwargs):
    if getattr(settings, 'ES_DISABLED', False):
        return

    es = get_es()
    for id_ in ids:
        mapping_type.unindex(id_, es=es, public_index=public_index)
Example #51
0
def unindex_objects(mapping_type, ids, public_index, **kwargs):
    if getattr(settings, 'ES_DISABLED', False):
        return

    es = get_es()
    for id_ in ids:
        mapping_type.unindex(id_, es=es, public_index=public_index)
Example #52
0
def recreate_index(es=None):
    """Delete index if it's there and creates a new one.

    :arg es: ES to use. By default, this creates a new indexing ES.

    """
    if es is None:
        es = get_es()

    mappings = {}
    for name, mt in get_mapping_types().items():
        mapping = mt.get_mapping()
        if mapping is not None:
            mappings[name] = {'properties': mapping}

    index = get_index()

    delete_index_if_exists(index)

    # There should be no mapping-conflict race here since the index
    # doesn't exist. Live indexing should just fail.

    # Simultaneously create the index and the mappings, so live
    # indexing doesn't get a chance to index anything between the two
    # causing ES to infer a possibly bogus mapping (which causes ES to
    # freak out if the inferred mapping is incompatible with the
    # explicit mapping).

    es.create_index(index, settings={'mappings': mappings})
Example #53
0
 def test_get_es_defaults(self):
     """Test that the ES has the correct defaults."""
     es = get_es()
     eq_(es.timeout, settings.ES_TIMEOUT)
     # dump_curl defaults to False, but if dump_curl is Falsey,
     # then pyes.es.ES sets its dump_curl attribute to None.
     eq_(es.dump_curl, None)
     eq_(es.default_indexes, [settings.ES_INDEXES['default']])
Example #54
0
    def get_es(cls):
        """Returns an ElasticSearch object

        Override this if you need special functionality.
        :returns: a pyelasticsearch `ElasticSearch` instance

        """
        return get_es()
Example #55
0
def user(**kwargs):
    profile_changes = {}
    if 'username' not in kwargs:
        kwargs['username'] = ''.join(
            random.choice(letters) for x in xrange(15))
    if 'email' not in kwargs:
        kwargs['email'] = ''.join(random.choice(letters)
                                  for x in xrange(15)) + '@example.com'
    if 'first_name' not in kwargs:
        kwargs['first_name'] = ''.join(
            random.choice(letters) for x in xrange(15))
    if 'last_name' not in kwargs:
        kwargs['last_name'] = ''.join(
            random.choice(letters) for x in xrange(15))

    if 'vouched' in kwargs:
        profile_changes['vouched'] = kwargs['vouched']
        del kwargs['vouched']
    if 'photo' in kwargs:
        profile_changes['photo'] = kwargs['photo']
        del kwargs['photo']
    user = User.objects.create(**kwargs)
    user.save()

    if profile_changes:
        profile = user.get_profile()

        if 'vouched' in profile_changes:
            if profile_changes['vouched']:
                profile.is_vouched = True

        if 'photo' in profile_changes:
            if profile_changes['photo']:
                with open(
                        os.path.join(os.path.dirname(__file__),
                                     'profile-photo.jpg')) as f:
                    profile.photo = File(f)
                    profile.save()  # Must save inside with block

        profile.save()

    if not settings.ES_DISABLED:
        get_es().refresh(settings.ES_INDEXES['default'], timesleep=0)

    return user
Example #56
0
    def index(cls,
              document,
              id_=None,
              bulk=False,
              force_insert=False,
              es=None):
        """Adds or updates a document to the index

        :arg document: Python dict of key/value pairs representing
            the document

            .. Note::

               This must be serializable into JSON.

        :arg id_: the Django ORM model instance id---this is used to
            convert an ES search result back to the Django ORM model
            instance from the db. It should be an integer.

            .. Note::

               If you don't provide an ``id_``, then ElasticSearch
               will make up an id for your document and it'll look like
               a character name from a Lovecraft novel.

        :arg bulk: Whether or not this is part of a bulk indexing.  If
            this is, you must provide an ES with the `es` argument,
            too.
        :arg force_insert: TODO
        :arg es: The ES to use. If you don't specify an ES, it'll
            use `elasticutils.contrib.django.get_es()`.

        :raises ValueError: if `bulk` is True, but `es` is None.

        .. Note::

           After you add things to the index, make sure to refresh the
           index by calling ``refresh_index()``---it doesn't happen
           automatically.


        TODO: add example.

        """
        if bulk and es is None:
            raise ValueError('bulk is True, but es is None')

        if es is None:
            es = get_es()

        es.index(document,
                 index=cls.get_index(),
                 doc_type=cls.get_mapping_type_name(),
                 id=id_,
                 bulk=bulk,
                 force_insert=force_insert)