Ejemplo n.º 1
0
def create_index(index, config=None):
    """Create an index if it's not present.

    Return the index name.

    Options:

    - index: name of the index.
    - config: if provided, used when passing the configuration of the index to
    ES.
    """
    es = amo_search.get_es()

    if config is None:
        config = {}

    if 'settings' not in config:
        config['settings'] = {
            'index': {}
        }
    else:
        # Make a deepcopy of the settings in the config that was passed, so
        # that we can modify it freely to add shards and replicas settings.
        config['settings'] = deepcopy(config['settings'])

    config['settings']['index'].update({
        'number_of_shards': settings.ES_DEFAULT_NUM_SHARDS,
        'number_of_replicas': settings.ES_DEFAULT_NUM_REPLICAS
    })

    if not es.indices.exists(index):
        es.indices.create(index, body=config)

    return index
Ejemplo n.º 2
0
def index_collection_counts(ids, index=None, **kw):
    index = index or search.get_alias()

    es = amo_search.get_es()
    qs = CollectionCount.objects.filter(collection__in=ids)

    if qs.exists():
        log.info('Indexing %s addon collection counts: %s'
                 % (qs.count(), qs[0].date))

    data = []
    try:
        for collection_count in qs:
            collection = collection_count.collection_id
            filters = dict(collection=collection,
                           date=collection_count.date)
            data.append(search.extract_addon_collection(
                collection_count,
                AddonCollectionCount.objects.filter(**filters),
                CollectionStats.objects.filter(**filters)))
        bulk_index(es, data, index=index,
                   doc_type=CollectionCount.get_mapping_type(),
                   refresh=True)
    except Exception, exc:
        index_collection_counts.retry(args=[ids], exc=exc)
        raise
Ejemplo n.º 3
0
def index_objects(ids, model, extract_func, index=None, transforms=None,
                  objects=None):
    if index is None:
        index = model._get_index()
    if objects is None:
        objects = model.objects

    indices = Reindexing.objects.get_indices(index)

    if transforms is None:
        transforms = []

    qs = objects.no_cache().filter(id__in=ids)
    for t in transforms:
        qs = qs.transform(t)

    bulk = []
    for ob in qs:
        data = extract_func(ob)
        for index in indices:
            bulk.append({
                "_source": data,
                "_id": ob.id,
                "_type": ob.get_mapping_type(),
                "_index": index
            })

    es = amo_search.get_es()
    return helpers.bulk(es, bulk)
Ejemplo n.º 4
0
def create_index(index, config=None):
    """Create an index if it's not present.

    Return the index name.

    Options:

    - index: name of the index.
    - config: if provided, used as the settings option for the
      ES calls.
    """
    es = amo_search.get_es()

    if settings.IN_TEST_SUITE:
        if not config:
            config = {}
        # Be nice to ES running on ci.mozilla.org
        config.update({
            'number_of_shards': 3,
            'number_of_replicas': 0
        })

    if not es.indices.exists(index):
        es.indices.create(index, body=config, ignore=400)

    return index
Ejemplo n.º 5
0
 def setUpClass(cls):
     cls.es = amo_search.get_es(timeout=settings.ES_TIMEOUT)
     cls._SEARCH_ANALYZER_MAP = amo.SEARCH_ANALYZER_MAP
     amo.SEARCH_ANALYZER_MAP = {
         'english': ['en-us'],
         'spanish': ['es'],
     }
     super(ESTestCase, cls).setUpClass()
Ejemplo n.º 6
0
    def setUpClass(cls):
        cls.es = amo_search.get_es(timeout=settings.ES_TIMEOUT)

        super(ESTestCase, cls).setUpClass()
        try:
            cls.es.cluster.health()
        except Exception, e:
            e.args = tuple(
                [u"%s (it looks like ES is not running, try starting it or "
                 u"don't run ES tests: make test_no_es)" % e.args[0]] +
                list(e.args[1:]))
            raise
Ejemplo n.º 7
0
def elastic():
    elastic_results = None
    status = ''
    try:
        es = search.get_es()
        health = es.cluster.health()
        if health['status'] == 'red':
            status = 'ES is red'
        elastic_results = health
    except Exception:
        elastic_results = {'exception': traceback.format_exc()}

    return status, elastic_results
Ejemplo n.º 8
0
def index_update_counts(ids, index=None, **kw):
    index = index or search.get_alias()

    es = amo_search.get_es()
    qs = UpdateCount.objects.filter(id__in=ids)
    if qs:
        log.info('Indexing %s updates for %s.' % (qs.count(), qs[0].date))
    data = []
    try:
        for update in qs:
            data.append(search.extract_update_count(update))
        bulk_index(es, data, index=index,
                   doc_type=UpdateCount.get_mapping_type(), refresh=True)
    except Exception, exc:
        index_update_counts.retry(args=[ids, index], exc=exc, **kw)
        raise
Ejemplo n.º 9
0
def index_download_counts(ids, index=None, **kw):
    index = index or search.get_alias()

    es = amo_search.get_es()
    qs = DownloadCount.objects.filter(id__in=ids)
    if qs:
        log.info('Indexing %s downloads for %s.' % (qs.count(), qs[0].date))
    try:
        data = []
        for dl in qs:
            data.append(search.extract_download_count(dl))
        bulk_index(es, data, index=index,
                   doc_type=DownloadCount.get_mapping_type(), refresh=True)
    except Exception, exc:
        index_download_counts.retry(args=[ids, index], exc=exc)
        raise
Ejemplo n.º 10
0
def elastic(request):
    INDEX = settings.ES_INDEXES['default']
    es = search.get_es()

    indexes = set(settings.ES_INDEXES.values())
    es_mappings = {
        'addons': get_addons_mappings(),
        'addons_stats': get_stats_mappings(),
    }
    ctx = {
        'index': INDEX,
        'nodes': es.nodes.stats(),
        'health': es.cluster.health(),
        'state': es.cluster.state(),
        'mappings': [(index, es_mappings.get(index, {})) for index in indexes],
    }
    return render(request, 'zadmin/elastic.html', ctx)
Ejemplo n.º 11
0
def index_update_counts(ids, index=None, **kw):
    index = index or UpdateCountIndexer.get_index_alias()

    es = amo_search.get_es()
    qs = UpdateCount.objects.filter(id__in=ids)
    if qs.exists():
        log.info('Indexing %s updates for %s.' % (qs.count(), qs[0].date))
    data = []
    try:
        for obj in qs:
            data.append(UpdateCountIndexer.extract_document(obj))
        bulk_index(es, data, index=index,
                   doc_type=UpdateCountIndexer.get_doctype_name(),
                   refresh=True)
    except Exception as exc:
        index_update_counts.retry(args=[ids, index], exc=exc, **kw)
        raise
Ejemplo n.º 12
0
def elastic(request):
    INDEX = settings.ES_INDEXES['default']
    es = search.get_es()

    indexes = set(settings.ES_INDEXES.values())
    es_mappings = {
        'addons': get_addons_mappings(),
        'addons_stats': get_stats_mappings(),
    }
    ctx = {
        'index': INDEX,
        'nodes': es.nodes.stats(),
        'health': es.cluster.health(),
        'state': es.cluster.state(),
        'mappings': [(index, es_mappings.get(index, {})) for index in indexes],
    }
    return render(request, 'zadmin/elastic.html', ctx)
Ejemplo n.º 13
0
def index_theme_user_counts(ids, index=None, **kw):
    index = index or search.get_alias()

    es = amo_search.get_es()
    qs = ThemeUserCount.objects.filter(id__in=ids)

    if qs:
        log.info('Indexing %s theme user counts for %s.'
                 % (qs.count(), qs[0].date))
    data = []
    try:
        for user_count in qs:
            data.append(search.extract_theme_user_count(user_count))
        bulk_index(es, data, index=index,
                   doc_type=ThemeUserCount.get_mapping_type(), refresh=True)
    except Exception, exc:
        index_theme_user_counts.retry(args=[ids], exc=exc, **kw)
        raise
Ejemplo n.º 14
0
def index_theme_user_counts(ids, index=None, **kw):
    index = index or search.get_alias()

    es = amo_search.get_es()
    qs = ThemeUserCount.objects.filter(id__in=ids)

    if qs.exists():
        log.info('Indexing %s theme user counts for %s.'
                 % (qs.count(), qs[0].date))
    data = []

    try:
        for user_count in qs:
            data.append(search.extract_theme_user_count(user_count))
        bulk_index(es, data, index=index,
                   doc_type=ThemeUserCount.get_mapping_type(), refresh=True)
    except Exception as exc:
        index_theme_user_counts.retry(args=[ids], exc=exc, **kw)
        raise
Ejemplo n.º 15
0
def index_download_counts(ids, index=None, **kw):
    index = index or search.get_alias()

    es = amo_search.get_es()
    qs = DownloadCount.objects.filter(id__in=ids)
    if qs:
        log.info('Indexing %s downloads for %s.' % (qs.count(), qs[0].date))
    try:
        data = []
        for dl in qs:
            data.append(search.extract_download_count(dl))
        bulk_index(es,
                   data,
                   index=index,
                   doc_type=DownloadCount.get_mapping_type(),
                   refresh=True)
    except Exception, exc:
        index_download_counts.retry(args=[ids, index], exc=exc)
        raise
Ejemplo n.º 16
0
def create_index(index, config=None):
    """Create an index if it's not present.

    Return the index name.

    Options:

    - index: name of the index.
    - config: if provided, used as the settings option for the
      ES calls.
    """
    es = amo_search.get_es()

    if settings.IN_TEST_SUITE:
        if not config:
            config = {}
        # Be nice to ES running on ci.mozilla.org
        config.update({'number_of_shards': 3, 'number_of_replicas': 0})

    if not es.indices.exists(index):
        es.indices.create(index, body=config, ignore=400)

    return index
Ejemplo n.º 17
0
def index_objects(ids,
                  model,
                  extract_func,
                  index=None,
                  transforms=None,
                  objects=None):
    if index is None:
        index = model._get_index()
    if objects is None:
        objects = model.objects

    indices = Reindexing.objects.get_indices(index)

    if transforms is None:
        transforms = []

    if hasattr(objects, 'no_cache'):
        qs = objects.no_cache()
    else:
        qs = objects
    qs = qs.filter(id__in=ids)
    for t in transforms:
        qs = qs.transform(t)

    bulk = []
    for ob in qs:
        data = extract_func(ob)
        for index in indices:
            bulk.append({
                "_source": data,
                "_id": ob.id,
                "_type": ob.get_mapping_type(),
                "_index": index
            })

    es = amo_search.get_es()
    return helpers.bulk(es, bulk)
Ejemplo n.º 18
0
def create_index(index, config=None):
    """Create an index if it's not present.

    Return the index name.

    Options:

    - index: name of the index.
    - config: if provided, used when passing the configuration of the index to
    ES.
    """
    es = amo_search.get_es()

    if config is None:
        config = {}

    if 'settings' not in config:
        config['settings'] = {'index': {}}
    else:
        # Make a deepcopy of the settings in the config that was passed, so
        # that we can modify it freely to add shards and replicas settings.
        config['settings'] = deepcopy(config['settings'])

    config['settings']['index'].update({
        'number_of_shards':
        settings.ES_DEFAULT_NUM_SHARDS,
        'number_of_replicas':
        settings.ES_DEFAULT_NUM_REPLICAS,
        'max_result_window':
        settings.ES_MAX_RESULT_WINDOW,
    })

    if not es.indices.exists(index):
        es.indices.create(index, body=config)

    return index
Ejemplo n.º 19
0
def index_collection_counts(ids, index=None, **kw):
    index = index or search.get_alias()

    es = amo_search.get_es()
    qs = CollectionCount.objects.filter(collection__in=ids)
    if qs:
        log.info('Indexing %s addon collection counts: %s'
                 % (qs.count(), qs[0].date))
    data = []
    try:
        for collection_count in qs:
            collection = collection_count.collection_id
            filters = dict(collection=collection,
                           date=collection_count.date)
            data.append(search.extract_addon_collection(
                collection_count,
                AddonCollectionCount.objects.filter(**filters),
                CollectionStats.objects.filter(**filters)))
        bulk_index(es, data, index=index,
                   doc_type=CollectionCount.get_mapping_type(),
                   refresh=True)
    except Exception, exc:
        index_collection_counts.retry(args=[ids], exc=exc)
        raise
Ejemplo n.º 20
0
from celery.task import control
from celery_tasktree import task_with_callbacks, TaskTree

from django.conf import settings
from django.core.management.base import BaseCommand, CommandError

from olympia.amo.search import get_es
from olympia.addons import search as addons_search
from olympia.stats import search as stats_search
from olympia.lib.es.utils import is_reindexing_amo, unflag_reindexing_amo, flag_reindexing_amo, timestamp_index

logger = logging.getLogger("z.elasticsearch")
time_limits = settings.CELERY_TIME_LIMITS["olympia.lib.es.management.commands.reindex"]


ES = get_es()


MODULES = {"stats": stats_search, "addons": addons_search}


def log(msg, stdout=sys.stdout):
    stdout.write(msg + "\n")


@task_with_callbacks
def delete_indexes(indexes, stdout=sys.stdout):
    indices = ",".join(indexes)
    log("Removing indices %r" % indices, stdout=stdout)
    ES.indices.delete(indices, ignore=[404, 500])
Ejemplo n.º 21
0
 def setUpClass(cls):
     # Stop the mock temporarily, the pytest fixture will start them
     # right before each test.
     stop_es_mocks()
     cls.es = amo_search.get_es(timeout=settings.ES_TIMEOUT)
     super(ESTestCase, cls).setUpClass()
Ejemplo n.º 22
0
 def get_bayesian_rating(self):
     qs = Search(using=get_es(),
                 index=AddonIndexer.get_index_alias()).filter(
                     Q('term', id=self.addon.pk))
     return qs.execute()[0]['bayesian_rating']
Ejemplo n.º 23
0
 def setUpClass(cls):
     # Stop the mock temporarily, the pytest fixture will start them
     # right before each test.
     stop_es_mocks()
     cls.es = amo_search.get_es()
     super().setUpClass()
Ejemplo n.º 24
0
def compatibility_report(index=None):
    docs = defaultdict(dict)
    indices = get_indices(index)

    # Gather all the data for the index.
    for app in amo.APP_USAGE:
        versions = [c for c in amo.COMPAT if c['app'] == app.id]

        log.info(u'Making compat report for %s.' % app.pretty)
        latest = UpdateCount.objects.aggregate(d=Max('date'))['d']
        qs = UpdateCount.objects.filter(addon__appsupport__app=app.id,
                                        addon__disabled_by_user=False,
                                        addon__status__in=amo.VALID_STATUSES,
                                        addon___current_version__isnull=False,
                                        date=latest)

        updates = dict(qs.values_list('addon', 'count'))
        for chunk in chunked(updates.items(), 50):
            chunk = dict(chunk)
            for addon in Addon.objects.filter(id__in=chunk):
                current_version = {
                    'id': addon.current_version.pk,
                    'version': addon.current_version.version,
                }
                doc = docs[addon.id]
                doc.update(id=addon.id,
                           slug=addon.slug,
                           guid=addon.guid,
                           binary=addon.binary_components,
                           name=unicode(addon.name),
                           created=addon.created,
                           current_version=current_version)
                doc['count'] = chunk[addon.id]
                doc.setdefault('top_95',
                               defaultdict(lambda: defaultdict(dict)))
                doc.setdefault('top_95_all', {})
                doc.setdefault('usage', {})[app.id] = updates[addon.id]
                doc.setdefault('works', {}).setdefault(app.id, {})

                # Populate with default counts for all app versions.
                for ver in versions:
                    doc['works'][app.id][vint(ver['main'])] = {
                        'success': 0,
                        'failure': 0,
                        'total': 0,
                        'failure_ratio': 0.0,
                    }

                # Group reports by `major`.`minor` app version.
                reports = (CompatReport.objects.filter(
                    guid=addon.guid, app_guid=app.guid).values_list(
                        'app_version', 'works_properly').annotate(Count('id')))
                for ver, works_properly, cnt in reports:
                    ver = vint(floor_version(ver))
                    major = [
                        v['main'] for v in versions
                        if vint(v['previous']) < ver <= vint(v['main'])
                    ]
                    if major:
                        w = doc['works'][app.id][vint(major[0])]
                        # Tally number of success and failure reports.
                        w['success' if works_properly else 'failure'] += cnt
                        w['total'] += cnt
                        # Calculate % of incompatibility reports.
                        w['failure_ratio'] = w['failure'] / float(w['total'])

                if app not in addon.compatible_apps:
                    continue
                compat = addon.compatible_apps[app]
                d = {
                    'min': compat.min.version_int,
                    'max': compat.max.version_int
                }
                doc.setdefault('support', {})[app.id] = d
                doc.setdefault('max_version', {})[app.id] = compat.max.version

        total = sum(updates.values())
        # Remember the total so we can show % of usage later.
        compat_total, created = CompatTotals.objects.safer_get_or_create(
            app=app.id, defaults={'total': total})
        if not created:
            compat_total.update(total=total)

        # Figure out which add-ons are in the top 95% for this app.
        running_total = 0
        for addon, count in sorted(updates.items(),
                                   key=lambda x: x[1],
                                   reverse=True):
            running_total += count
            docs[addon]['top_95_all'][app.id] = running_total < (.95 * total)

    # Mark the top 95% of add-ons compatible with the previous version for each
    # app + version combo.
    for compat in amo.COMPAT:
        app, ver = compat['app'], vint(compat['previous'])
        # Find all the docs that have a max_version compatible with ver.
        supported = [
            compat_doc for compat_doc in docs.values()
            if (app in compat_doc.get('support', {})
                and compat_doc['support'][app]['max'] >= ver)
        ]
        # Sort by count so we can get the top 95% most-used add-ons.
        supported = sorted(supported, key=lambda d: d['count'], reverse=True)
        total = sum(doc['count'] for doc in supported)
        # Figure out which add-ons are in the top 95% for this app + version.
        running_total = 0
        for doc in supported:
            running_total += doc['count']
            doc['top_95'][app][ver] = running_total < (.95 * total)

    # Send it all to the index.
    for chunk in chunked(docs.values(), 150):
        for doc in chunk:
            for index in indices:
                AppCompat.index(doc, id=doc['id'], refresh=False, index=index)
    es = amo_search.get_es()
    es.indices.refresh()
Ejemplo n.º 25
0
from elasticsearch.exceptions import NotFoundError

import olympia.core.logger

from olympia.addons.indexers import AddonIndexer
from olympia.amo.celery import task
from olympia.amo.search import get_es
from olympia.lib.es.utils import (
    flag_reindexing_amo, is_reindexing_amo, timestamp_index,
    unflag_reindexing_amo)
from olympia.stats.indexers import DownloadCountIndexer, UpdateCountIndexer


logger = olympia.core.logger.getLogger('z.elasticsearch')
ES = get_es()


def get_indexer(alias):
    """Return indexer python module for a given alias.

    This needs to be dynamic to work with testing correctly, since tests change
    the value of settings.ES_INDEXES to hit test-specific aliases.
    """
    modules = {
        # The keys are the index alias names, the values the indexer classes.
        # The 'default' in ES_INDEXES is actually named 'addons'
        settings.ES_INDEXES['default']: AddonIndexer,
        settings.ES_INDEXES['stats_download_counts']: DownloadCountIndexer,
        settings.ES_INDEXES['stats_update_counts']: UpdateCountIndexer,
    }
Ejemplo n.º 26
0
 def test_get_es_mocked(self):
     es = search.get_es()
     assert issubclass(es.__class__, mock.Mock)
Ejemplo n.º 27
0
def compatibility_report(index=None):
    docs = defaultdict(dict)
    indices = get_indices(index)

    # Gather all the data for the index.
    for app in amo.APP_USAGE:
        versions = [c for c in amo.COMPAT if c['app'] == app.id]

        log.info(u'Making compat report for %s.' % app.pretty)
        latest = UpdateCount.objects.aggregate(d=Max('date'))['d']
        qs = UpdateCount.objects.filter(addon__appsupport__app=app.id,
                                        addon__disabled_by_user=False,
                                        addon__status__in=amo.VALID_STATUSES,
                                        addon___current_version__isnull=False,
                                        date=latest)

        updates = dict(qs.values_list('addon', 'count'))
        for chunk in chunked(updates.items(), 50):
            chunk = dict(chunk)
            for addon in Addon.objects.filter(id__in=chunk):
                current_version = {
                    'id': addon.current_version.pk,
                    'version': addon.current_version.version,
                }
                doc = docs[addon.id]
                doc.update(id=addon.id, slug=addon.slug, guid=addon.guid,
                           binary=addon.binary_components,
                           name=unicode(addon.name), created=addon.created,
                           current_version=current_version)
                doc['count'] = chunk[addon.id]
                doc.setdefault('top_95',
                               defaultdict(lambda: defaultdict(dict)))
                doc.setdefault('top_95_all', {})
                doc.setdefault('usage', {})[app.id] = updates[addon.id]
                doc.setdefault('works', {}).setdefault(app.id, {})

                # Populate with default counts for all app versions.
                for ver in versions:
                    doc['works'][app.id][vint(ver['main'])] = {
                        'success': 0,
                        'failure': 0,
                        'total': 0,
                        'failure_ratio': 0.0,
                    }

                # Group reports by `major`.`minor` app version.
                reports = (CompatReport.objects
                           .filter(guid=addon.guid, app_guid=app.guid)
                           .values_list('app_version', 'works_properly')
                           .annotate(Count('id')))
                for ver, works_properly, cnt in reports:
                    ver = vint(floor_version(ver))
                    major = [v['main'] for v in versions
                             if vint(v['previous']) < ver <= vint(v['main'])]
                    if major:
                        w = doc['works'][app.id][vint(major[0])]
                        # Tally number of success and failure reports.
                        w['success' if works_properly else 'failure'] += cnt
                        w['total'] += cnt
                        # Calculate % of incompatibility reports.
                        w['failure_ratio'] = w['failure'] / float(w['total'])

                if app not in addon.compatible_apps:
                    continue
                compat = addon.compatible_apps[app]
                d = {'min': compat.min.version_int,
                     'max': compat.max.version_int}
                doc.setdefault('support', {})[app.id] = d
                doc.setdefault('max_version', {})[app.id] = compat.max.version

        total = sum(updates.values())
        # Remember the total so we can show % of usage later.
        compat_total, created = CompatTotals.objects.safer_get_or_create(
            app=app.id,
            defaults={'total': total})
        if not created:
            compat_total.update(total=total)

        # Figure out which add-ons are in the top 95% for this app.
        running_total = 0
        for addon, count in sorted(updates.items(), key=lambda x: x[1],
                                   reverse=True):
            running_total += count
            docs[addon]['top_95_all'][app.id] = running_total < (.95 * total)

    # Mark the top 95% of add-ons compatible with the previous version for each
    # app + version combo.
    for compat in amo.COMPAT:
        app, ver = compat['app'], vint(compat['previous'])
        # Find all the docs that have a max_version compatible with ver.
        supported = [compat_doc for compat_doc in docs.values()
                     if (app in compat_doc.get('support', {}) and
                         compat_doc['support'][app]['max'] >= ver)]
        # Sort by count so we can get the top 95% most-used add-ons.
        supported = sorted(supported, key=lambda d: d['count'], reverse=True)
        total = sum(doc['count'] for doc in supported)
        # Figure out which add-ons are in the top 95% for this app + version.
        running_total = 0
        for doc in supported:
            running_total += doc['count']
            doc['top_95'][app][ver] = running_total < (.95 * total)

    # Send it all to the index.
    for chunk in chunked(docs.values(), 150):
        for doc in chunk:
            for index in indices:
                AppCompat.index(doc, id=doc['id'], refresh=False, index=index)
    es = amo_search.get_es()
    es.indices.refresh()
Ejemplo n.º 28
0
def compatibility_report(index=None):
    docs = defaultdict(dict)
    indices = get_indices(index)

    # Gather all the data for the index.
    log.info(u'Generating Firefox compat report.')
    latest = UpdateCount.objects.aggregate(d=Max('date'))['d']
    qs = UpdateCount.objects.filter(addon__appsupport__app=amo.FIREFOX.id,
                                    addon__disabled_by_user=False,
                                    addon__status__in=amo.VALID_ADDON_STATUSES,
                                    addon___current_version__isnull=False,
                                    date=latest)

    updates = dict(qs.values_list('addon', 'count'))
    for chunk in chunked(updates.items(), 50):
        chunk = dict(chunk)
        for addon in Addon.objects.filter(id__in=chunk):
            if (amo.FIREFOX not in addon.compatible_apps or
                    addon.compatible_apps[amo.FIREFOX] is None):
                # Ignore this add-on if it does not have compat information
                # for Firefox.
                continue

            current_version = {
                'id': addon.current_version.pk,
                'version': addon.current_version.version,
            }
            doc = docs[addon.id]
            doc.update(id=addon.id, slug=addon.slug, guid=addon.guid,
                       binary=addon.binary_components,
                       name=unicode(addon.name), created=addon.created,
                       current_version=current_version)
            doc['count'] = chunk[addon.id]
            doc['usage'] = updates[addon.id]
            doc['top_95'] = {}

            # Populate with default counts for all versions.
            doc['works'] = {vint(version['main']): {
                'success': 0,
                'failure': 0,
                'total': 0,
                'failure_ratio': 0.0,
            } for version in FIREFOX_COMPAT}

            # Group reports by `major`.`minor` app version.
            reports = (CompatReport.objects
                       .filter(guid=addon.guid, app_guid=amo.FIREFOX.guid)
                       .values_list('app_version', 'works_properly')
                       .annotate(Count('id')))
            for ver, works_properly, cnt in reports:
                ver = vint(floor_version(ver))
                major = [v['main'] for v in FIREFOX_COMPAT
                         if vint(v['previous']) < ver <= vint(v['main'])]
                if major:
                    w = doc['works'][vint(major[0])]
                    # Tally number of success and failure reports.
                    w['success' if works_properly else 'failure'] += cnt
                    w['total'] += cnt
                    # Calculate % of incompatibility reports.
                    w['failure_ratio'] = w['failure'] / float(w['total'])

            compat = addon.compatible_apps[amo.FIREFOX]
            doc['support'] = {'min': compat.min.version_int,
                              'max': compat.max.version_int}
            doc['max_version'] = compat.max.version

    total = sum(updates.values())
    # Remember the total so we can show % of usage later.
    compat_total, created = CompatTotals.objects.safer_get_or_create(
        defaults={'total': total})
    if not created:
        compat_total.update(total=total)

    # Figure out which add-ons are in the top 95%.
    running_total = 0
    for addon, count in sorted(updates.items(), key=lambda x: x[1],
                               reverse=True):
        # Ignore the updates we skipped because of bad app compatibility.
        if addon in docs:
            running_total += count
            docs[addon]['top_95_all'] = running_total < (.95 * total)

    # Mark the top 95% of add-ons compatible with the previous version for each
    # version.
    for compat in FIREFOX_COMPAT:
        version = vint(compat['previous'])
        # Find all the docs that have a max_version compatible with version.
        supported = [compat_doc for compat_doc in docs.values()
                     if compat_doc['support']['max'] >= version]
        # Sort by count so we can get the top 95% most-used add-ons.
        supported = sorted(supported, key=lambda d: d['count'], reverse=True)
        total = sum(doc['count'] for doc in supported)
        # Figure out which add-ons are in the top 95% for this app + version.
        running_total = 0
        for doc in supported:
            running_total += doc['count']
            doc['top_95'][version] = running_total < (.95 * total)

    # Send it all to ES.
    bulk = []
    for id_, doc in docs.items():
        for index in set(indices):
            bulk.append({
                "_source": doc,
                "_id": id_,
                "_type": AppCompat.get_mapping_type(),
                "_index": index or AppCompat._get_index(),
            })

    es = amo_search.get_es()
    log.info('Bulk indexing %s compat docs on %s indices' % (
             len(docs), len(indices)))
    elasticsearch.helpers.bulk(es, bulk, chunk_size=150)
    es.indices.refresh()
Ejemplo n.º 29
0
def compatibility_report(index=None):
    docs = defaultdict(dict)
    indices = get_indices(index)

    # Gather all the data for the index.
    log.info(u'Generating Firefox compat report.')
    latest = UpdateCount.objects.aggregate(d=Max('date'))['d']
    qs = UpdateCount.objects.filter(addon__appsupport__app=amo.FIREFOX.id,
                                    addon__disabled_by_user=False,
                                    addon__status__in=amo.VALID_ADDON_STATUSES,
                                    addon___current_version__isnull=False,
                                    date=latest)

    updates = dict(qs.values_list('addon', 'count'))
    for chunk in chunked(updates.items(), 50):
        chunk = dict(chunk)
        for addon in Addon.objects.filter(id__in=chunk):
            if (amo.FIREFOX not in addon.compatible_apps
                    or addon.compatible_apps[amo.FIREFOX] is None):
                # Ignore this add-on if it does not have compat information
                # for Firefox.
                continue

            current_version = {
                'id': addon.current_version.pk,
                'version': addon.current_version.version,
            }
            doc = docs[addon.id]
            doc.update(id=addon.id,
                       slug=addon.slug,
                       guid=addon.guid,
                       binary=addon.binary_components,
                       name=unicode(addon.name),
                       created=addon.created,
                       current_version=current_version)
            doc['count'] = chunk[addon.id]
            doc['usage'] = updates[addon.id]
            doc['top_95'] = {}

            # Populate with default counts for all versions.
            doc['works'] = {
                vint(version['main']): {
                    'success': 0,
                    'failure': 0,
                    'total': 0,
                    'failure_ratio': 0.0,
                }
                for version in FIREFOX_COMPAT
            }

            # Group reports by `major`.`minor` app version.
            reports = (CompatReport.objects.filter(
                guid=addon.guid, app_guid=amo.FIREFOX.guid).values_list(
                    'app_version', 'works_properly').annotate(Count('id')))
            for ver, works_properly, cnt in reports:
                ver = vint(floor_version(ver))
                major = [
                    v['main'] for v in FIREFOX_COMPAT
                    if vint(v['previous']) < ver <= vint(v['main'])
                ]
                if major:
                    w = doc['works'][vint(major[0])]
                    # Tally number of success and failure reports.
                    w['success' if works_properly else 'failure'] += cnt
                    w['total'] += cnt
                    # Calculate % of incompatibility reports.
                    w['failure_ratio'] = w['failure'] / float(w['total'])

            compat = addon.compatible_apps[amo.FIREFOX]
            doc['support'] = {
                'min': compat.min.version_int,
                'max': compat.max.version_int
            }
            doc['max_version'] = compat.max.version

    total = sum(updates.values())
    # Remember the total so we can show % of usage later.
    compat_total, created = CompatTotals.objects.safer_get_or_create(
        defaults={'total': total})
    if not created:
        compat_total.update(total=total)

    # Figure out which add-ons are in the top 95%.
    running_total = 0
    for addon, count in sorted(updates.items(),
                               key=lambda x: x[1],
                               reverse=True):
        # Ignore the updates we skipped because of bad app compatibility.
        if addon in docs:
            running_total += count
            docs[addon]['top_95_all'] = running_total < (.95 * total)

    # Mark the top 95% of add-ons compatible with the previous version for each
    # version.
    for compat in FIREFOX_COMPAT:
        version = vint(compat['previous'])
        # Find all the docs that have a max_version compatible with version.
        supported = [
            compat_doc for compat_doc in docs.values()
            if compat_doc['support']['max'] >= version
        ]
        # Sort by count so we can get the top 95% most-used add-ons.
        supported = sorted(supported, key=lambda d: d['count'], reverse=True)
        total = sum(doc['count'] for doc in supported)
        # Figure out which add-ons are in the top 95% for this app + version.
        running_total = 0
        for doc in supported:
            running_total += doc['count']
            doc['top_95'][version] = running_total < (.95 * total)

    # Send it all to ES.
    bulk = []
    for id_, doc in docs.items():
        for index in set(indices):
            bulk.append({
                "_source": doc,
                "_id": id_,
                "_type": AppCompat.get_mapping_type(),
                "_index": index or AppCompat._get_index(),
            })

    es = amo_search.get_es()
    log.info('Bulk indexing %s compat docs on %s indices' %
             (len(docs), len(indices)))
    elasticsearch.helpers.bulk(es, bulk, chunk_size=150)
    es.indices.refresh()