def update_downloads(ids, **kw): client = get_monolith_client() today = datetime.date.today() count = 0 for app in Webapp.objects.filter(id__in=ids).no_transforms(): kwargs = {'app-id': app.id} # Get weekly downloads. # # If we query monolith with interval=week and the past 7 days # crosses a Monday, Monolith splits the counts into two. We want # the sum over the past week so we need to `sum` these. try: weekly = sum( c['count'] for c in client('app_installs', days_ago(7).date(), today, 'week', **kwargs) if c.get('count')) except ValueError as e: task_log.info('Call to ES failed: {0}'.format(e)) weekly = 0 # Get total downloads. # # The monolith client lib doesn't handle this for us so we send a raw # ES query to Monolith. query = {'query': {'match_all': {}}, 'facets': { 'installs': { 'statistical': {'field': 'app_installs'}, 'facet_filter': {'term': kwargs}}}, 'size': 0} try: resp = client.raw(query) total = resp.get('facets', {}).get('installs', {}).get('total', 0) except Exception as e: task_log.info('Call to ES failed: {0}'.format(e)) total = 0 # Update Webapp object, if needed. update = False signal = False if weekly != app.weekly_downloads: update = True signal = True if total != app.total_downloads: update = True if update: # Note: Calling `update` will trigger a reindex on the app if # `_signal` is True. Since we only index `weekly_downloads`, we # can skip reindexing if this hasn't changed. count += 1 app.update(weekly_downloads=weekly, total_downloads=total, _signal=signal) task_log.info('App downloads updated for %s out of %s apps.' % (count, len(ids)))
def get(self, request, metric): if metric not in STATS: raise http.Http404('No metric by that name.') if not waffle.switch_is_active('stats-api'): raise NotImplemented('Stats not enabled for this host.') # Perform form validation. form = GlobalStatsForm(request.GET) if not form.is_valid(): raise ParseError(dict(form.errors.items())) data = form.cleaned_data client = get_monolith_client() try: metric_data = list(client(STATS[metric]['metric'], data.get('start'), data.get('end'), data.get('interval'))) except ValueError: # This occurs if monolith doesn't have our metric and we get an # elasticsearch SearchPhaseExecutionException error. log.info('Monolith ValueError for metric {0}'.format( STATS[metric]['metric'])) raise ParseError('Invalid metric at this time. Try again later.') return Response({'objects': metric_data})
def get_client(self): try: client = get_monolith_client() except requests.ConnectionError as e: log.info('Monolith connection error: {0}'.format(e)) raise ServiceUnavailable return client
def _monolith_site_query(period, start, end, field): fields = { 'mmo_total_visitors': 'visits', 'apps_count_installed': 'app_installs', 'apps_review_count_new': 'review_count', 'mmo_user_count_new': 'new_user_count', 'apps_count_new': 'app_count', 'mmo_user_count_total': 'total_user_count' } # Getting data from the monolith server. client = get_monolith_client() if period == 'date': period = 'day' def _get_data(): for result in client(fields[field], start, end, interval=period): yield { 'date': result['date'].strftime('%Y-%m-%d'), 'data': { field: result['count'] } } try: return list(_get_data()), _CACHED_KEYS except ValueError, e: if len(e.args) > 0: logger.error(e.args[0]) return [], _CACHED_KEYS
def _monolith_site_query(period, start, end, field): fields = {'mmo_total_visitors': 'visits', 'apps_count_installed': 'app_installs', 'apps_review_count_new': 'review_count', 'mmo_user_count_new': 'user_count', 'apps_count_new': 'app_count', 'mmo_user_count_total': 'total_user_count'} # Getting data from the monolith server. client = get_monolith_client() if period == 'date': period = 'day' # The start date is not included in the range. # The end date is included. start = start + timedelta(days=1) def _get_data(): for result in client(fields[field], start, end, interval=period, strict_range=False): yield {'date': result['date'].strftime('%Y-%m-%d'), 'data': {field: result['count']}} try: return list(_get_data()), _CACHED_KEYS except ValueError, e: if len(e.args) > 0: logger.error(e.args[0]) return [], _CACHED_KEYS
def get(self, request, metric): if metric not in STATS: raise http.Http404("No metric by that name.") if not waffle.switch_is_active("stats-api"): raise NotImplemented("Stats not enabled for this host.") # Perform form validation. form = GlobalStatsForm(request.GET) if not form.is_valid(): raise ParseError(dict(form.errors.items())) data = form.cleaned_data client = get_monolith_client() try: metric_data = list( client(STATS[metric]["metric"], data.get("start"), data.get("end"), data.get("interval")) ) except ValueError: # This occurs if monolith doesn't have our metric and we get an # elasticsearch SearchPhaseExecutionException error. log.info("Monolith ValueError for metric {0}".format(STATS[metric]["metric"])) raise ParseError("Invalid metric at this time. Try again later.") return Response({"objects": metric_data})
def _get_monolith_data(stat, start, end, interval, dimensions): # If stat has a 'lines' attribute, it's a multi-line graph. Do a # request for each item in 'lines' and compose them in a single # response. try: client = get_monolith_client() except requests.ConnectionError as e: log.info('Monolith connection error: {0}'.format(e)) raise ServiceUnavailable try: data = {} if 'lines' in stat: for line_name, line_dimension in stat['lines'].items(): dimensions.update(line_dimension) data[line_name] = list(client(stat['metric'], start, end, interval, **dimensions)) else: data['objects'] = list(client(stat['metric'], start, end, interval, **dimensions)) except ValueError as e: # This occurs if monolith doesn't have our metric and we get an # elasticsearch SearchPhaseExecutionException error. log.info('Monolith ValueError for metric {0}: {1}'.format( stat['metric'], e)) raise ParseError('Invalid metric at this time. Try again later.') return data
def _get_monolith_data(stat, start, end, interval, dimensions): # If stat has a 'lines' attribute, it's a multi-line graph. Do a # request for each item in 'lines' and compose them in a single # response. client = get_monolith_client() try: data = {} if 'lines' in stat: for line_name, line_dimension in stat['lines'].items(): dimensions.update(line_dimension) data[line_name] = list( client(stat['metric'], start, end, interval, **dimensions)) else: data['objects'] = list( client(stat['metric'], start, end, interval, **dimensions)) except ValueError as e: # This occurs if monolith doesn't have our metric and we get an # elasticsearch SearchPhaseExecutionException error. log.info('Monolith ValueError for metric {0}: {1}'.format( stat['metric'], e)) raise ParseError('Invalid metric at this time. Try again later.') return data
def get(self, request, pk): app = self.get_object() try: client = get_monolith_client() except requests.ConnectionError as e: log.info('Monolith connection error: {0}'.format(e)) raise ServiceUnavailable # Note: We have to do this as separate requests so that if one fails # the rest can still be returned. data = {} for metric, stat in APP_STATS_TOTAL.items(): data[metric] = {} query = { 'query': { 'match_all': {} }, 'facets': { metric: { 'statistical': { 'field': stat['metric'] }, 'facet_filter': { 'term': { 'app-id': app.id } } } }, 'size': 0 } try: resp = client.raw(query) except ValueError as e: log.info('Received value error from monolith client: %s' % e) continue for metric, facet in resp.get('facets', {}).items(): count = facet.get('count', 0) # We filter out facets with count=0 to avoid returning things # like `'max': u'-Infinity'`. if count > 0: for field in ('max', 'mean', 'min', 'std_deviation', 'sum_of_squares', 'total', 'variance'): value = facet.get(field) if value is not None: data[metric][field] = value return Response(data)
def _get_trending(app_id, region=None): """ Calculate trending. a = installs from 7 days ago to now b = installs from 28 days ago to 8 days ago, averaged per week trending = (a - b) / b if a > 100 and b > 1 else 0 """ client = get_monolith_client() kwargs = {"app-id": app_id} if region: kwargs["region"] = region.slug today = datetime.datetime.today() # If we query monolith with interval=week and the past 7 days # crosses a Monday, Monolith splits the counts into two. We want # the sum over the past week so we need to `sum` these. try: count_1 = sum( c["count"] for c in client("app_installs", days_ago(7), today, "week", **kwargs) if c.get("count") ) except ValueError as e: task_log.info("Call to ES failed: {0}".format(e)) count_1 = 0 # If count_1 isn't more than 100, stop here to avoid extra Monolith calls. if not count_1 > 100: return 0.0 # Get the average installs for the prior 3 weeks. Don't use the `len` of # the returned counts because of week boundaries. try: count_3 = ( sum( c["count"] for c in client("app_installs", days_ago(28), days_ago(8), "week", **kwargs) if c.get("count") ) / 3 ) except ValueError as e: task_log.info("Call to ES failed: {0}".format(e)) count_3 = 0 if count_3 > 1: return (count_1 - count_3) / count_3 else: return 0.0
def get(self, request, metric): if metric not in STATS: raise http.Http404('No metric by that name.') if not waffle.switch_is_active('stats-api'): raise NotImplemented('Stats not enabled for this host.') stat = STATS[metric] # Perform form validation. form = GlobalStatsForm(request.GET) if not form.is_valid(): raise ParseError(dict(form.errors.items())) qs = form.cleaned_data client = get_monolith_client() dimensions = {} if 'dimensions' in stat: for key, default in stat['dimensions'].items(): val = request.GET.get(key, default) if val is not None: # Avoid passing kwargs to the monolith client when the # dimension is None to avoid facet filters being applied. dimensions[key] = request.GET.get(key, default) # If stat has a 'lines' attribute, it's a multi-line graph. Do a # request for each item in 'lines' and compose them in a single # response. try: data = {} if 'lines' in stat: for line_name, line_dimension in stat['lines'].items(): dimensions.update(line_dimension) data[line_name] = list( client(stat['metric'], qs.get('start'), qs.get('end'), qs.get('interval'), **dimensions)) else: data['objects'] = list( client(stat['metric'], qs.get('start'), qs.get('end'), qs.get('interval'), **dimensions)) except ValueError as e: # This occurs if monolith doesn't have our metric and we get an # elasticsearch SearchPhaseExecutionException error. log.info('Monolith ValueError for metric {0}: {1}'.format( stat['metric'], e)) raise ParseError('Invalid metric at this time. Try again later.') return Response(data)
def _get_trending(app_id, region=None): """ Calculate trending. a = installs from 7 days ago to now b = installs from 28 days ago to 8 days ago, averaged per week trending = (a - b) / b if a > 100 and b > 1 else 0 """ client = get_monolith_client() kwargs = {'app-id': app_id} if region: kwargs['region'] = region.slug today = datetime.datetime.today() days_ago = lambda d: today - datetime.timedelta(days=d) # If we query monolith with interval=week and the past 7 days # crosses a Monday, Monolith splits the counts into two. We want # the sum over the past week so we need to `sum` these. try: count_1 = sum( c['count'] for c in client('app_installs', days_ago(7), today, 'week', **kwargs) if c.get('count')) except ValueError as e: task_log.info('Call to ES failed: {0}'.format(e)) count_1 = 0 # If count_1 isn't more than 100, stop here to avoid extra Monolith calls. if not count_1 > 100: return 0.0 # Get the average installs for the prior 3 weeks. Don't use the `len` of # the returned counts because of week boundaries. try: count_3 = sum( c['count'] for c in client('app_installs', days_ago(28), days_ago(8), 'week', **kwargs) if c.get('count')) / 3 except ValueError as e: task_log.info('Call to ES failed: {0}'.format(e)) count_3 = 0 if count_3 > 1: return (count_1 - count_3) / count_3 else: return 0.0
def _get_trending(app_id, region=None): """ Calculate trending. a = installs from 7 days ago to now b = installs from 28 days ago to 8 days ago, averaged per week trending = (a - b) / b if a > 100 and b > 1 else 0 """ client = get_monolith_client() kwargs = {'app-id': app_id} if region: kwargs['region'] = region.slug today = datetime.datetime.today() days_ago = lambda d: today - datetime.timedelta(days=d) # If we query monolith with interval=week and the past 7 days # crosses a Monday, Monolith splits the counts into two. We want # the sum over the past week so we need to `sum` these. try: count_1 = sum( c['count'] for c in client('app_installs', days_ago(7), today, 'week', **kwargs) if c.get('count')) except ValueError as e: log.info('Call to ES failed: {0}'.format(e)) count_1 = 0 # Get the average installs for the prior 3 weeks. Don't use the `len` of # the returned counts because of week boundaries. try: count_3 = sum( c['count'] for c in client('app_installs', days_ago(28), days_ago(8), 'week', **kwargs) if c.get('count')) / 3 except ValueError as e: log.info('Call to ES failed: {0}'.format(e)) count_3 = 0 if count_1 > 100 and count_3 > 1: return (count_1 - count_3) / count_3 else: return 0.0
def get_detail(self, request, **kwargs): metric = kwargs.get('metric') if metric not in STATS: raise ImmediateHttpResponse(response=http.HttpNotFound()) # Trigger form validation which doesn't normally happen for GETs. bundle = self.build_bundle(data=request.GET, request=request) self.is_valid(bundle, request) start = bundle.data.get('start') end = bundle.data.get('end') interval = bundle.data.get('interval') client = get_monolith_client() data = list(client(STATS[metric]['metric'], start, end, interval)) to_be_serialized = self.alter_list_data_to_serialize( request, {'objects': data}) return self.create_response(request, to_be_serialized)
def _get_monolith_data(stat, start, end, interval, dimensions): # If stat has a 'lines' attribute, it's a multi-line graph. Do a # request for each item in 'lines' and compose them in a single # response. try: client = get_monolith_client() except requests.ConnectionError as e: log.info('Monolith connection error: {0}'.format(e)) raise ServiceUnavailable def _coerce(data): for key, coerce in stat.get('coerce', {}).items(): if data.get(key): data[key] = coerce(data[key]) return data try: data = {} if 'lines' in stat: for line_name, line_dimension in stat['lines'].items(): dimensions.update(line_dimension) data[line_name] = map( _coerce, client(stat['metric'], start, end, interval, **dimensions)) else: data['objects'] = map( _coerce, client(stat['metric'], start, end, interval, **dimensions)) except ValueError as e: # This occurs if monolith doesn't have our metric and we get an # elasticsearch SearchPhaseExecutionException error. log.info('Monolith ValueError for metric {0}: {1}'.format( stat['metric'], e)) raise ParseError('Invalid metric at this time. Try again later.') return data
def _get_trending(app_id, region=None): """ Calculate trending. a = installs from 7 days ago to now b = installs from 28 days ago to 8 days ago, averaged per week trending = (a - b) / b if a > 100 and b > 1 else 0 """ client = get_monolith_client() kwargs = {'app-id': app_id} if region: kwargs['region'] = region.slug today = datetime.datetime.today() days_ago = lambda d: today - datetime.timedelta(days=d) # If we query monolith with interval=week and the past 7 days # crosses a Monday, Monolith splits the counts into two. We want # the sum over the past week so we need to `sum` these. count_1 = sum( c['count'] for c in client('app_installs', days_ago(7), today, 'week', **kwargs)) # Get the average installs for the prior 3 weeks. Don't use the `len` of # the returned counts because of week boundaries. counts_3 = list( client('app_installs', days_ago(28), days_ago(8), 'week', **kwargs)) count_3 = sum(c['count'] for c in counts_3) / 3 if count_1 > 100 and count_3 > 1: return (count_1 - count_3) / count_3 else: return 0.0
def get_series_line(model, group, primary_field=None, extra_fields=None, extra_values=None, **filters): """ Get a generator of dicts for the stats model given by the filters, made to fit into Highchart's datetime line graph. primary_field takes a field name that can be referenced by the key 'count' extra_fields takes a list of fields that can be found in the index on top of date and count and can be seen in the output extra_values is a list of constant values added to each line """ if not extra_fields: extra_fields = [] extra_values = extra_values or {} if waffle.switch_is_active('monolith-stats'): keys = {Installed: 'app_installs', UpdateCount: 'updatecount_XXX', Contribution: 'contribution_XXX', InappPayment: 'inapppayment_XXX'} # Getting data from the monolith server. client = get_monolith_client() field = keys[model] start, end = filters['date__range'] if group == 'date': group = 'day' try: for result in client(field, start, end, interval=group, addon_id=filters['addon']): res = {'count': result['count']} for extra_field in extra_fields: res[extra_field] = result[extra_field] date_ = date(*result['date'].timetuple()[:3]) res['end'] = res['date'] = date_ res.update(extra_values) yield res except ValueError as e: if len(e.args) > 0: logger.error(e.args[0]) else: # Pull data out of ES data = list((model.search().order_by('-date').filter(**filters) .values_dict('date', 'count', primary_field, *extra_fields))[:365]) # Pad empty data with dummy dicts. days = [datum['date'].date() for datum in data] fields = [] if primary_field: fields.append(primary_field) if extra_fields: fields += extra_fields data += pad_missing_stats(days, group, filters.get('date__range'), fields) # Sort in descending order. data = sorted(data, key=lambda document: document['date'], reverse=True) # Generate dictionary with options from ES document for val in data: # Convert the datetimes to a date. date_ = date(*val['date'].timetuple()[:3]) if primary_field and primary_field != 'count': rv = dict(count=val[primary_field], date=date_, end=date_) else: rv = dict(count=val['count'], date=date_, end=date_) for extra_field in extra_fields: rv[extra_field] = val[extra_field] rv.update(extra_values) yield rv
def update_downloads(ids, **kw): client = get_monolith_client() count = 0 for app in Webapp.objects.filter(id__in=ids).no_transforms(): appid = {'app-id': app.id} # Get weekly downloads. query = { 'query': { 'match_all': {} }, 'facets': { 'installs': { 'date_histogram': { 'value_field': 'app_installs', 'interval': 'week', 'key_field': 'date', }, 'facet_filter': { 'and': [{ 'term': appid }, { 'range': { 'date': { 'gte': days_ago(8).date().strftime('%Y-%m-%d'), 'lte': days_ago(1).date().strftime('%Y-%m-%d'), } } }] } } }, 'size': 0 } try: resp = client.raw(query) # If we query monolith with interval=week and the past 7 days # crosses a Monday, Monolith splits the counts into two. We want # the sum over the past week so we need to `sum` these. weekly = sum(c['total'] for c in resp.get('facets', {}).get( 'installs', {}).get('entries') if c.get('total')) except Exception as e: task_log.info('Call to ES failed: {0}'.format(e)) weekly = 0 # Get total downloads. query = { 'query': { 'match_all': {} }, 'facets': { 'installs': { 'statistical': { 'field': 'app_installs' }, 'facet_filter': { 'term': appid } } }, 'size': 0 } try: resp = client.raw(query) total = resp.get('facets', {}).get('installs', {}).get('total', 0) except Exception as e: task_log.info('Call to ES failed: {0}'.format(e)) total = 0 # Update Webapp object, if needed. update = False signal = False if weekly != app.weekly_downloads: update = True signal = True if total != app.total_downloads: update = True if update: # Note: Calling `update` will trigger a reindex on the app if # `_signal` is True. Since we only index `weekly_downloads`, we # can skip reindexing if this hasn't changed. count += 1 app.update(weekly_downloads=weekly, total_downloads=total, _signal=signal) task_log.info('App downloads updated for %s out of %s apps.' % (count, len(ids)))
def update_downloads(ids, **kw): client = get_monolith_client() count = 0 for app in Webapp.objects.filter(id__in=ids).no_transforms(): appid = {'app-id': app.id} # Get weekly downloads. query = { 'query': {'match_all': {}}, 'facets': { 'installs': { 'date_histogram': { 'value_field': 'app_installs', 'interval': 'week', 'key_field': 'date', }, 'facet_filter': { 'and': [ {'term': appid}, {'range': {'date': { 'gte': days_ago(8).date().strftime('%Y-%m-%d'), 'lte': days_ago(1).date().strftime('%Y-%m-%d'), }}} ] } } }, 'size': 0} try: resp = client.raw(query) # If we query monolith with interval=week and the past 7 days # crosses a Monday, Monolith splits the counts into two. We want # the sum over the past week so we need to `sum` these. weekly = sum( c['total'] for c in resp.get('facets', {}).get('installs', {}).get('entries') if c.get('total')) except Exception as e: task_log.info('Call to ES failed: {0}'.format(e)) weekly = 0 # Get total downloads. query = {'query': {'match_all': {}}, 'facets': { 'installs': { 'statistical': {'field': 'app_installs'}, 'facet_filter': {'term': appid}}}, 'size': 0} try: resp = client.raw(query) total = resp.get('facets', {}).get('installs', {}).get('total', 0) except Exception as e: task_log.info('Call to ES failed: {0}'.format(e)) total = 0 # Update Webapp object, if needed. update = False signal = False if weekly != app.weekly_downloads: update = True signal = True if total != app.total_downloads: update = True if update: # Note: Calling `update` will trigger a reindex on the app if # `_signal` is True. Since we only index `weekly_downloads`, we # can skip reindexing if this hasn't changed. count += 1 app.update(weekly_downloads=weekly, total_downloads=total, _signal=signal) task_log.info('App downloads updated for %s out of %s apps.' % (count, len(ids)))
def _get_installs(app_id): """ Calculate popularity of app for all regions and per region. Returns value in the format of:: {'all': <global installs>, <region_slug>: <regional installs>, ...} """ # How many days back do we include when calculating popularity. POPULARITY_PERIOD = 90 client = get_monolith_client() popular = { 'filter': { 'range': { 'date': { 'gte': days_ago(POPULARITY_PERIOD).date().isoformat(), 'lte': days_ago(1).date().isoformat() } } }, 'aggs': { 'total_installs': { 'sum': { 'field': 'app_installs' } } } } query = { 'query': { 'filtered': { 'query': {'match_all': {}}, 'filter': {'term': {'app-id': app_id}} } }, 'aggregations': { 'popular': popular, 'region': { 'terms': { 'field': 'region', # Add size so we get all regions, not just the top 10. 'size': len(mkt.regions.ALL_REGIONS) }, 'aggregations': { 'popular': popular } } }, 'size': 0 } try: res = client.raw(query) except ValueError as e: task_log.error('Error response from Monolith: {0}'.format(e)) return {} if 'aggregations' not in res: task_log.error('No installs for app {}'.format(app_id)) return {} results = { 'all': res['aggregations']['popular']['total_installs']['value'] } if 'region' in res['aggregations']: for regional_res in res['aggregations']['region']['buckets']: region_slug = regional_res['key'] popular = regional_res['popular']['total_installs']['value'] results[region_slug] = popular return results
def _get_trending(app_id): """ Calculate trending for app for all regions and per region. a = installs from 8 days ago to 1 day ago b = installs from 29 days ago to 9 days ago, averaged per week trending = (a - b) / b if a > 100 and b > 1 else 0 Returns value in the format of:: {'all': <global trending score>, <region_slug>: <regional trending score>, ...} """ # How many app installs are required in the prior week to be considered # "trending". Adjust this as total Marketplace app installs increases. # # Note: AMO uses 1000.0 for add-ons. PRIOR_WEEK_INSTALL_THRESHOLD = 100.0 client = get_monolith_client() week1 = { 'filter': { 'range': { 'date': { 'gte': days_ago(8).date().isoformat(), 'lte': days_ago(1).date().isoformat() } } }, 'aggs': { 'total_installs': { 'sum': { 'field': 'app_installs' } } } } week3 = { 'filter': { 'range': { 'date': { 'gte': days_ago(29).date().isoformat(), 'lte': days_ago(9).date().isoformat() } } }, 'aggs': { 'total_installs': { 'sum': { 'field': 'app_installs' } } } } query = { 'query': { 'filtered': { 'query': {'match_all': {}}, 'filter': {'term': {'app-id': app_id}} } }, 'aggregations': { 'week1': week1, 'week3': week3, 'region': { 'terms': { 'field': 'region', # Add size so we get all regions, not just the top 10. 'size': len(mkt.regions.ALL_REGIONS) }, 'aggregations': { 'week1': week1, 'week3': week3 } } }, 'size': 0 } try: res = client.raw(query) except ValueError as e: task_log.error('Error response from Monolith: {0}'.format(e)) return {} if 'aggregations' not in res: task_log.error('No installs for app {}'.format(app_id)) return {} def _score(week1, week3): # If last week app installs are < 100, this app isn't trending. if week1 < PRIOR_WEEK_INSTALL_THRESHOLD: return 0.0 score = 0.0 if week3 > 1.0: score = (week1 - week3) / week3 if score < 0.0: score = 0.0 return score # Global trending score. week1 = res['aggregations']['week1']['total_installs']['value'] week3 = res['aggregations']['week3']['total_installs']['value'] / 3.0 if week1 < PRIOR_WEEK_INSTALL_THRESHOLD: # If global installs over the last week aren't over 100, we # short-circuit and return a zero-like value as this is not a trending # app by definition. Since global installs aren't above 100, per-region # installs won't be either. return {} results = { 'all': _score(week1, week3) } if 'region' in res['aggregations']: for regional_res in res['aggregations']['region']['buckets']: region_slug = regional_res['key'] week1 = regional_res['week1']['total_installs']['value'] week3 = regional_res['week3']['total_installs']['value'] / 3.0 results[region_slug] = _score(week1, week3) return results
def update_downloads(ids, **kw): client = get_monolith_client() count = 0 for app in Webapp.objects.filter(id__in=ids).no_transforms(): appid = {"app-id": app.id} # Get weekly downloads. query = { "query": {"match_all": {}}, "facets": { "installs": { "date_histogram": {"value_field": "app_installs", "interval": "week", "key_field": "date"}, "facet_filter": { "and": [ {"term": appid}, { "range": { "date": { "gte": days_ago(8).date().strftime("%Y-%m-%d"), "lte": days_ago(1).date().strftime("%Y-%m-%d"), } } }, ] }, } }, "size": 0, } try: resp = client.raw(query) # If we query monolith with interval=week and the past 7 days # crosses a Monday, Monolith splits the counts into two. We want # the sum over the past week so we need to `sum` these. weekly = sum( c["total"] for c in resp.get("facets", {}).get("installs", {}).get("entries") if c.get("total") ) except Exception as e: task_log.info("Call to ES failed: {0}".format(e)) weekly = 0 # Get total downloads. query = { "query": {"match_all": {}}, "facets": {"installs": {"statistical": {"field": "app_installs"}, "facet_filter": {"term": appid}}}, "size": 0, } try: resp = client.raw(query) total = resp.get("facets", {}).get("installs", {}).get("total", 0) except Exception as e: task_log.info("Call to ES failed: {0}".format(e)) total = 0 # Update Webapp object, if needed. update = False signal = False if weekly != app.weekly_downloads: update = True signal = True if total != app.total_downloads: update = True if update: # Note: Calling `update` will trigger a reindex on the app if # `_signal` is True. Since we only index `weekly_downloads`, we # can skip reindexing if this hasn't changed. count += 1 app.update(weekly_downloads=weekly, total_downloads=total, _signal=signal) task_log.info("App downloads updated for %s out of %s apps." % (count, len(ids)))
def update_downloads(ids, **kw): client = get_monolith_client() today = datetime.date.today() count = 0 for app in Webapp.objects.filter(id__in=ids).no_transforms(): kwargs = {'app-id': app.id} # Get weekly downloads. # # If we query monolith with interval=week and the past 7 days # crosses a Monday, Monolith splits the counts into two. We want # the sum over the past week so we need to `sum` these. try: weekly = sum( c['count'] for c in client('app_installs', days_ago(7).date(), today, 'week', **kwargs) if c.get('count')) except ValueError as e: task_log.info('Call to ES failed: {0}'.format(e)) weekly = 0 # Get total downloads. # # The monolith client lib doesn't handle this for us so we send a raw # ES query to Monolith. query = { 'query': { 'match_all': {} }, 'facets': { 'installs': { 'statistical': { 'field': 'app_installs' }, 'facet_filter': { 'term': kwargs } } }, 'size': 0 } try: resp = client.raw(query) total = resp.get('facets', {}).get('installs', {}).get('total', 0) except Exception as e: task_log.info('Call to ES failed: {0}'.format(e)) total = 0 # Update Webapp object, if needed. update = False signal = False if weekly != app.weekly_downloads: update = True signal = True if total != app.total_downloads: update = True if update: # Note: Calling `update` will trigger a reindex on the app if # `_signal` is True. Since we only index `weekly_downloads`, we # can skip reindexing if this hasn't changed. count += 1 app.update(weekly_downloads=weekly, total_downloads=total, _signal=signal) task_log.info('App downloads updated for %s out of %s apps.' % (count, len(ids)))
def get_series_line(model, group, primary_field=None, extra_fields=None, extra_values=None, **filters): """ Get a generator of dicts for the stats model given by the filters, made to fit into Highchart's datetime line graph. primary_field takes a field name that can be referenced by the key 'count' extra_fields takes a list of fields that can be found in the index on top of date and count and can be seen in the output extra_values is a list of constant values added to each line """ if not extra_fields: extra_fields = [] extra_values = extra_values or {} if waffle.switch_is_active('monolith-stats'): keys = { Installed: 'apps_installs', UpdateCount: 'updatecount_XXX', Contribution: 'contribution_XXX', InappPayment: 'inapppayment_XXX' } # Getting data from the monolith server. client = get_monolith_client() field = keys[model] start, end = filters['date__range'] if group == 'date': group = 'day' for result in client(field, start, end, interval=group, addon_id=filters['addon']): res = {'count': result['count']} for extra_field in extra_fields: res[extra_field] = result[extra_field] date_ = date(*result['date'].timetuple()[:3]) res['end'] = res['date'] = date_ res.update(extra_values) yield res else: # Pull data out of ES data = list( (model.search().order_by('-date').filter(**filters).values_dict( 'date', 'count', primary_field, *extra_fields))[:365]) # Pad empty data with dummy dicts. days = [datum['date'].date() for datum in data] fields = [] if primary_field: fields.append(primary_field) if extra_fields: fields += extra_fields data += pad_missing_stats(days, group, filters.get('date__range'), fields) # Sort in descending order. data = sorted(data, key=lambda document: document['date'], reverse=True) # Generate dictionary with options from ES document for val in data: # Convert the datetimes to a date. date_ = date(*val['date'].timetuple()[:3]) if primary_field and primary_field != 'count': rv = dict(count=val[primary_field], date=date_, end=date_) else: rv = dict(count=val['count'], date=date_, end=date_) for extra_field in extra_fields: rv[extra_field] = val[extra_field] rv.update(extra_values) yield rv
def _get_trending(app_id): """ Calculate trending for app for all regions and per region. a = installs from 8 days ago to 1 day ago b = installs from 29 days ago to 9 days ago, averaged per week trending = (a - b) / b if a > 100 and b > 1 else 0 Returns value in the format of:: {'all': <global trending score>, <region_slug>: <regional trending score>, ...} """ # How many app installs are required in the prior week to be considered # "trending". Adjust this as total Marketplace app installs increases. # # Note: AMO uses 1000.0 for add-ons. PRIOR_WEEK_INSTALL_THRESHOLD = 100.0 client = get_monolith_client() week1 = { 'filter': { 'range': { 'date': { 'gte': days_ago(8).date().isoformat(), 'lte': days_ago(1).date().isoformat() } } }, 'aggs': { 'total_installs': { 'sum': { 'field': 'app_installs' } } } } week3 = { 'filter': { 'range': { 'date': { 'gte': days_ago(29).date().isoformat(), 'lte': days_ago(9).date().isoformat() } } }, 'aggs': { 'total_installs': { 'sum': { 'field': 'app_installs' } } } } query = { 'query': { 'filtered': { 'query': { 'match_all': {} }, 'filter': { 'term': { 'app-id': app_id } } } }, 'aggregations': { 'week1': week1, 'week3': week3, 'region': { 'terms': { 'field': 'region', # Add size so we get all regions, not just the top 10. 'size': len(mkt.regions.ALL_REGIONS) }, 'aggregations': { 'week1': week1, 'week3': week3 } } }, 'size': 0 } try: res = client.raw(query) except ValueError as e: task_log.error('Error response from Monolith: {0}'.format(e)) return {} if 'aggregations' not in res: task_log.error('No installs for app {}'.format(app_id)) return {} def _score(week1, week3): # If last week app installs are < 100, this app isn't trending. if week1 < PRIOR_WEEK_INSTALL_THRESHOLD: return 0.0 score = 0.0 if week3 > 1.0: score = (week1 - week3) / week3 if score < 0.0: score = 0.0 return score # Global trending score. week1 = res['aggregations']['week1']['total_installs']['value'] week3 = res['aggregations']['week3']['total_installs']['value'] / 3.0 if week1 < PRIOR_WEEK_INSTALL_THRESHOLD: # If global installs over the last week aren't over 100, we # short-circuit and return a zero-like value as this is not a trending # app by definition. Since global installs aren't above 100, per-region # installs won't be either. return {} results = {'all': _score(week1, week3)} if 'region' in res['aggregations']: for regional_res in res['aggregations']['region']['buckets']: region_slug = regional_res['key'] week1 = regional_res['week1']['total_installs']['value'] week3 = regional_res['week3']['total_installs']['value'] / 3.0 results[region_slug] = _score(week1, week3) return results
def _get_installs(app_id): """ Calculate popularity of app for all regions and per region. Returns value in the format of:: {'all': <global installs>, <region_slug>: <regional installs>, ...} """ # How many days back do we include when calculating popularity. POPULARITY_PERIOD = 90 client = get_monolith_client() popular = { 'filter': { 'range': { 'date': { 'gte': days_ago(POPULARITY_PERIOD).date().isoformat(), 'lte': days_ago(1).date().isoformat() } } }, 'aggs': { 'total_installs': { 'sum': { 'field': 'app_installs' } } } } query = { 'query': { 'filtered': { 'query': { 'match_all': {} }, 'filter': { 'term': { 'app-id': app_id } } } }, 'aggregations': { 'popular': popular, 'region': { 'terms': { 'field': 'region', # Add size so we get all regions, not just the top 10. 'size': len(mkt.regions.ALL_REGIONS) }, 'aggregations': { 'popular': popular } } }, 'size': 0 } try: res = client.raw(query) except ValueError as e: task_log.error('Error response from Monolith: {0}'.format(e)) return {} if 'aggregations' not in res: task_log.error('No installs for app {}'.format(app_id)) return {} results = { 'all': res['aggregations']['popular']['total_installs']['value'] } if 'region' in res['aggregations']: for regional_res in res['aggregations']['region']['buckets']: region_slug = regional_res['key'] popular = regional_res['popular']['total_installs']['value'] results[region_slug] = popular return results