def get_builds(product, channel, date): """Get the buildids for a product/channel prior to date""" if channel == "nightly": # for nightly, the strategy is pretty simple: # - just get builds few day before (and update the old one too) ndays = config.get_ndays() few_days_ago = date - relativedelta(days=ndays + 5) few_days_ago = datetime(few_days_ago.year, few_days_ago.month, few_days_ago.day) search_buildid = [ ">=" + utils.get_buildid(few_days_ago), "<=" + utils.get_buildid(date), ] search_date = ">=" + lmdutils.get_date_str(few_days_ago) bids = get_buildids_from_socorro(search_buildid, search_date, product) else: bids = [] search_date = "" min_date = None data = models.Build.get_last_versions(date, channel, product, n=3) if data: # data are ordered by buildid (desc) bids = [x["buildid"] for x in data] first_date = utils.get_build_date(bids[-1]) if min_date is None or min_date > first_date: min_date = first_date if min_date: search_date = ">=" + lmdutils.get_date_str(min_date) return bids, search_date
def prepare(significants, bugs_by_signature, totals, date): if significants: today = utils.get_date_ymd(date) yesterday = today - relativedelta(days=1) yesterday = utils.get_date_str(yesterday) tomorrow = today + relativedelta(days=1) tomorrow = utils.get_date_str(tomorrow) today = utils.get_date_str(today) search_date = ['>=' + today, '<' + tomorrow] affected_chans = set() urls = defaultdict(lambda: dict()) spikes_number = 0 results = OrderedDict() for product in products: if product in significants: data1 = significants[product] results1 = OrderedDict() results[product] = results1 for chan in channels: if chan in data1: affected_chans.add(chan) params = { 'product': product, 'date': search_date, 'release_channel': chan } params.update(query) url = socorro.SuperSearch.get_link(params) urls[product][chan] = url spikes_number += 1 results2 = OrderedDict() results1[chan] = results2 sgns = data1[chan] # we order on the 2nd number (today) and the signature for sgn, num in sorted(sgns.items(), key=lambda p: (p[1][1], p[0]), reverse=True): bugs = bugs_by_signature.get(sgn, {}) results3 = OrderedDict() results2[sgn] = results3 results3['numbers'] = num results3['resolved'] = bugs.get('resolved', None) results3['unresolved'] = bugs.get( 'unresolved', None) affected_chans = list(sorted(affected_chans)) return results, spikes_number, urls, affected_chans, yesterday, today return None
def get_extra_for_template(self): return { 'nightly': self.nightly, 'beta': self.beta, 'release': self.release, 'date': lmdutils.get_date_str(self.date), }
def update_status_flags(info, update=False, verbose=False): status_flags_by_channel = info['status_flags'] base_versions = info['base_versions'] start_date_by_channel = info['start_dates'] end_date = info['end_date'] for c, d in start_date_by_channel.items(): start_date_by_channel[c] = utils.get_date_str(d) bugs_to_update = {} for sgn, i in info['signatures'].items(): data = generate_bug_report(sgn, i, status_flags_by_channel, base_versions, start_date_by_channel, end_date) if data: bugid = i['bugid'] bugs_to_update[bugid] = data for bugid, data in bugs_to_update.items(): __warn('Bug %d: %s' % (bugid, str(data)), verbose) __warn(data['comment']['body'], verbose) if update: Bugzilla([str(bugid)]).put(data) if update: links = '\n'.join(Bugzilla.get_links(list(bugs_to_update.keys()))) __warn('Bug links: %s' % links, verbose)
def fill_tables(): engine = db.get_engine(app) if not engine.dialect.has_table(engine, 'crashes_bytype'): import requests logger.info('Generate tables') types = { 'crashes-bytype': Bytype, 'crashes-categories': Categories, 'annotations': Annotations } db.create_all() base_url = 'https://crash-analysis.mozilla.com/rkaiser/{}-{}-{}.json' for product in magutils.get_products(): for channel in magutils.get_channels(): for typ, obj in types.items(): url = base_url.format(product, channel, typ) logger.info('Get data from {}'.format(url)) response = requests.get(url) logger.info('Status is {}'.format(response.status_code)) if response.status_code == 200: data = response.json() obj.populate(product, channel, data) logger.info('DB populated for {}::{}'.format( product, channel)) if product == 'Firefox' and \ channel == 'release' and \ typ == 'crashes-bytype': dates = [magutils.get_date(d) for d in data.keys()] last = max(dates) last = utils.get_date_str(last) Lastdate.put(last)
def get_extra_for_template(self): return { 'nightly': self.nightly, 'beta': self.beta, 'release': self.release, 'date': lmdutils.get_date_str(self.date), }
def get_extra_for_template(self): return { "nightly": self.nightly, "beta": self.beta, "release": self.release, "date": lmdutils.get_date_str(self.date), }
def update_status_flags(info, update=False, verbose=False): status_flags_by_channel = info['status_flags'] base_versions = info['base_versions'] start_date_by_channel = info['start_dates'] end_date = info['end_date'] for c, d in start_date_by_channel.items(): start_date_by_channel[c] = utils.get_date_str(d) bugs_to_update = {} for sgn, i in info['signatures'].items(): data = generate_bug_report(sgn, i, status_flags_by_channel, base_versions, start_date_by_channel, end_date) if data: bugid = i['bugid'] bugs_to_update[bugid] = data for bugid, data in bugs_to_update.items(): __warn('Bug %d: %s' % (bugid, str(data)), verbose) __warn(data['comment']['body'], verbose) if update: Bugzilla([str(bugid)]).put(data) if update: links = '\n'.join(Bugzilla.get_links(list(bugs_to_update.keys()))) __warn('Bug links: %s' % links, verbose)
def test_cache(self): cache = Cache('test_cache', 7) cache.set_dry_run(False) bugids = [123, 456, 789] cache.add(bugids) for bugid in bugids: assert bugid in cache assert str(bugid) in cache assert 101112 not in cache assert '101112' not in cache with open(cache.get_path(), 'r') as In: data = json.load(In) for bugid in ['123', '456']: date = data[bugid] date = lmdutils.get_date_ymd(date) - relativedelta(days=8) data[bugid] = lmdutils.get_date_str(date) with open(cache.get_path(), 'w') as Out: json.dump(data, Out) cache = Cache('test_cache', 7) cache.set_dry_run(False) assert 123 not in cache assert 456 not in cache assert 789 in cache
def test_cache(self): cache = Cache("test_cache", 7) cache.set_dry_run(False) bugids = [123, 456, 789] cache.add(bugids) for bugid in bugids: assert bugid in cache assert str(bugid) in cache assert 101112 not in cache assert "101112" not in cache with open(cache.get_path(), "r") as In: data = json.load(In) for bugid in ["123", "456"]: date = data[bugid] date = lmdutils.get_date_ymd(date) - relativedelta(days=8) data[bugid] = lmdutils.get_date_str(date) with open(cache.get_path(), "w") as Out: json.dump(data, Out) cache = Cache("test_cache", 7) cache.set_dry_run(False) assert 123 not in cache assert 456 not in cache assert 789 in cache
def get_versions(date, product, channel): earliest_mindate = utils.get_date_str(date - datetime.timedelta(days=365)) all_versions = get_all_versions(product, earliest_mindate) delta = datetime.timedelta(weeks=getMaxBuildAge()[channel]) min_version_date = utils.get_date_ymd(date) - delta versions = [] throttle = 0 last_versions = [] last_throttle = 0 last_date = utils.get_guttenberg_death() for v in all_versions: if v['product'] == product and v['build_type'] == channel: sd = utils.get_date_ymd(v['start_date']) if sd <= date: if sd > last_date: last_date = sd last_versions = [v['version']] last_throttle = 100. / float(v['throttle']) if sd > min_version_date: versions.append(v['version']) if throttle == 0: throttle = 100. / float(v['throttle']) if not versions: versions = last_versions throttle = last_throttle return versions, throttle
def get(product, channel, date): date = magutils.get_date(date) if date: bytype = db.session.query(Bytype).filter_by(product=product, channel=channel, date=date) else: bytype = db.session.query(Bytype).filter_by(product=product, channel=channel) r = {} for bt in bytype: date = utils.get_date_str(bt.date) r[date] = { 'adi': bt.adi, 'crashes': { 'Content': bt.content, 'OOP Plugin': bt.oop_plugin, 'Hang Plugin': bt.hang_plugin, 'Browser': bt.browser, 'Gpu': bt.gpu }, 'versions': bt.versions.split('|') } return r
def get_stats_for_past_weeks(product, channel, start_date_by_channel, versions_by_channel, analysis, search_start_date, end_date, check_for_fx=True): queries = [] trends = {} signatures_by_chan = {} default_trend_by_chan = {} ref_monday, _ = utils.get_monday_sunday(utils.get_date_ymd(end_date)) def get_past_week(date): monday, _ = utils.get_monday_sunday(date) return (ref_monday - monday).days // 7 for chan in channel: past_w = get_past_week(start_date_by_channel[chan]) default_trend_by_chan[chan] = {i: 0 for i in range(past_w + 1)} for signature, info in analysis.items(): if not check_for_fx or info['firefox']: data = {} trends[signature] = data # for chan, volume in info['affected']: for chan in channel: if chan in signatures_by_chan: signatures_by_chan[chan].append(signature) else: signatures_by_chan[chan] = [signature] data[chan] = default_trend_by_chan[chan].copy() def handler_ss(chan, json, data): sgns = [] for facets in json['facets']['histogram_date']: d = utils.get_date_ymd(facets['term']) w = get_past_week(d) s = facets['facets']['signature'] for signature in s: count = signature['count'] sgn = signature['term'] sgns.append(sgn) data[sgn][chan][w] += count for chan, signatures in signatures_by_chan.items(): if search_start_date: search_date = socorro.SuperSearch.get_search_date(search_start_date, end_date) else: search_date = socorro.SuperSearch.get_search_date(utils.get_date_str(start_date_by_channel[chan]), end_date) vers = versions_by_channel[chan] for sgns in Connection.chunks(signatures, 10): queries.append(Query(socorro.SuperSearch.URL, {'signature': ['=' + s for s in sgns], 'product': product, 'version': vers, 'release_channel': chan, 'date': search_date, '_histogram.date': 'signature', '_results_number': 0}, handler=functools.partial(handler_ss, chan), handlerdata=trends)) socorro.SuperSearch(queries=queries).wait() return trends
def get_search_date(search_start_date, start_date, end_date=utils.get_date('today')): if search_start_date: return socorro.SuperSearch.get_search_date(search_start_date, end_date) else: return socorro.SuperSearch.get_search_date( utils.get_date_str(start_date), end_date)
def handler_rev(json, data): push = json["pushdate"][0] push = datetime.datetime.utcfromtimestamp(push) push = lmdutils.as_utc(push) data["date"] = lmdutils.get_date_str(push) data["backedout"] = utils.is_backout(json) m = BUG_PAT.search(json["desc"]) if not m or m.group(1) != data["bugid"]: data["bugid"] = ""
def handler_rev(json, data): push = json['pushdate'][0] push = datetime.datetime.utcfromtimestamp(push) push = lmdutils.as_utc(push) data['date'] = lmdutils.get_date_str(push) data['backedout'] = utils.is_backout(json) m = BUG_PAT.search(json['desc']) if not m or m.group(1) != data['bugid']: data['bugid'] = ''
def handler_rev(json, data): push = json['pushdate'][0] push = datetime.datetime.utcfromtimestamp(push) push = lmdutils.as_utc(push) data['date'] = lmdutils.get_date_str(push) data['backedout'] = utils.is_backout(json) m = BUG_PAT.search(json['desc']) if not m or m.group(1) != data['bugid']: data['bugid'] = ''
def get_info_from_hg(json): res = {} push = json["pushdate"][0] push = datetime.datetime.utcfromtimestamp(push) push = lmdutils.as_utc(push) res["date"] = lmdutils.get_date_str(push) res["backedout"] = json.get("backedoutby", "") != "" m = BUG_PAT.search(json["desc"]) res["bugid"] = m.group(1) if m else "" return res
def tocsv(filename, channel, versions=None, product='Firefox', start_date=None, end_date='today', duration=30, platforms=None): with open(filename, 'w') as Out: writer = csv.writer(Out, delimiter=',') data = get(channel, versions, product, start_date, end_date, duration, platforms) data = [(utils.get_date_str(d), data[d]) for d in sorted(data)] head = ['date', 'adi', 'browser', 'content', 'b+c', 'plugin', 'browser_rate', 'content_rate', 'b+c_rate', 'plugin_rate'] writer.writerow(head) for d in data: row = [d[0], d[1]['adi'], d[1]['browser'], d[1]['content'], d[1]['b+c'], d[1]['plugin'], d[1]['browser_rate'], d[1]['content_rate'], d[1]['b+c_rate'], d[1]['plugin_rate']] writer.writerow(row)
def get_info_from_hg(json): res = {} push = json['pushdate'][0] push = datetime.datetime.utcfromtimestamp(push) push = lmdutils.as_utc(push) res['date'] = lmdutils.get_date_str(push) res['backedout'] = json.get('backedoutby', '') != '' m = BUG_PAT.search(json['desc']) res['bugid'] = m.group(1) if m else '' return res
def get_info_from_hg(json): res = {} push = json['pushdate'][0] push = datetime.datetime.utcfromtimestamp(push) push = lmdutils.as_utc(push) res['date'] = lmdutils.get_date_str(push) res['backedout'] = json.get('backedoutby', '') != '' m = BUG_PAT.search(json['desc']) res['bugid'] = m.group(1) if m else '' return res
def get_filename(date, output_dir): try: if date: date = utils.get_date_str(utils.get_date_ymd(date)) else: dates = getdates(output_dir) if dates['dates']: date = dates['dates'][-1] else: return None return os.path.join(output_dir, date + '.json') except: return None
def get_filename(date, output_dir): try: if date: date = utils.get_date_str(utils.get_date_ymd(date)) else: dates = getdates(output_dir) if dates['dates']: date = dates['dates'][-1] else: return None return os.path.join(output_dir, date + '.json') except: return None
def get_params_for_link(date, query={}): today = utils.get_date_ymd(date) tomorrow = today + relativedelta(days=1) tomorrow = utils.get_date_str(tomorrow) today = utils.get_date_str(today) search_date = ['>=' + today, '<' + tomorrow] params = {'product': '', 'date': search_date, 'release_channel': '', 'signature': '', '_facets': ['url', 'user_comments', 'install_time', 'version', 'address', 'moz_crash_reason', 'reason', 'build_id', 'platform_pretty_version', 'signature', 'useragent_locale']} params.update(query) return params
def get_browser_startup(product, channel, date): date = magutils.get_date(date) if date: cats = db.session.query(Categories).filter_by( product=product, channel=channel, kind='startup', date=date, ) else: cats = db.session.query(Categories).filter_by(product=product, channel=channel, kind='startup') r = {} for cat in cats: date = utils.get_date_str(cat.date) r[date] = cat.browser return dict(r)
def get_changeset(buildid, channel, product): """Trick to get changeset for a particular buildid/channel/product""" search_date = ">=" + lmdutils.get_date_str(buildid) buildid = utils.get_buildid(buildid) logger.info("Get changeset for {}-{}-{}.".format(buildid, product, channel)) def handler(json, data): pat = re.compile(r"^.*:([0-9a-f]+)$") if not json["facets"]["build_id"]: return for facets in json["facets"]["build_id"]: for tf in facets["facets"]["topmost_filenames"]: m = pat.match(tf["term"]) if m: chgset = m.group(1) count = tf["count"] data[chgset] += count params = { "product": product, "release_channel": channel, "build_id": buildid, "date": search_date, "topmost_filenames": '@"hg:hg.mozilla.org/".*:[0-9a-f]+', "_aggs.build_id": "topmost_filenames", "_results_number": 0, "_facets": "product", "_facets_size": 100, } data = defaultdict(lambda: 0) socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait() chgset = None if data: chgset, _ = max(data.items(), key=lambda p: p[1]) chgset = utils.short_rev(chgset) logger.info("Get changeset: finished.") return chgset
def get_changeset(buildid, channel, product): """Trick to get changeset for a particular buildid/channel/product""" search_date = '>=' + lmdutils.get_date_str(buildid) buildid = utils.get_buildid(buildid) logger.info('Get changeset for {}-{}-{}.'.format(buildid, product, channel)) def handler(json, data): pat = re.compile(r'^.*:([0-9a-f]+)$') if not json['facets']['build_id']: return for facets in json['facets']['build_id']: for tf in facets['facets']['topmost_filenames']: m = pat.match(tf['term']) if m: chgset = m.group(1) count = tf['count'] data[chgset] += count params = { 'product': product, 'release_channel': channel, 'build_id': buildid, 'date': search_date, 'topmost_filenames': '@\"hg:hg.mozilla.org/\".*:[0-9a-f]+', '_aggs.build_id': 'topmost_filenames', '_results_number': 0, '_facets': 'product', '_facets_size': 100 } data = defaultdict(lambda: 0) socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait() chgset = None if data: chgset, _ = max(data.items(), key=lambda p: p[1]) chgset = utils.short_rev(chgset) logger.info('Get changeset: finished.') return chgset
def get(product, channel, date): date = magutils.get_date(date) if date: cats = db.session.query(Categories).filter_by(product=product, channel=channel, date=date) else: cats = db.session.query(Categories).filter_by(product=product, channel=channel) r = defaultdict(lambda: dict()) for cat in cats: kind = cat.kind date = utils.get_date_str(cat.date) if kind == 'shutdownhang': r[date]['shutdownhang'] = cat.browser else: r[date][kind] = { 'content': cat.content, 'browser': cat.browser, 'plugin': cat.plugin } return dict(r)
def get_params_for_link(query={}): today = utils.get_date_ymd('today') last = today - relativedelta(days=config.get_limit()) last = utils.get_date_str(last) search_date = ['>=' + last] params = { 'product': '', 'date': search_date, 'release_channel': '', 'version': '', 'signature': '', '_facets': [ 'url', 'user_comments', 'install_time', 'version', 'address', 'moz_crash_reason', 'reason', 'build_id', 'platform_pretty_version', 'signature', 'useragent_locale' ] } params.update(query) return params
def generate_bug_report(sgn, info, status_flags_by_channel, base_versions, start_date_by_channel, end_date, check_for_fx=True): data = {} if not check_for_fx or info['firefox']: volumes = default_volumes.copy() data = {} for channel, volume in info['affected']: data[status_flags_by_channel[channel]] = 'affected' volumes[channel] = volume for channel, volume in info['leftovers']: volumes[channel] = volume # We begin with the crash volume comment = 'Crash volume for signature \'%s\':\n' % sgn table = [] for chan, volume in sorted(volumes.items(), key=lambda k: channel_order[k[0]]): version = base_versions[chan] start_date = start_date_by_channel[chan] plural = 'es' if volume != 1 else '' table.append(['- %s' % chan, '(version %d):' % version, '%d crash%s from %s.' % (volume, plural, utils.get_date_str(start_date))]) comment += __mk_volume_table(table, 'global') # Make the table for the trend table = [] empty = False N = -1 for chan, trend in sorted(info['trend'].items(), key=lambda k: channel_order[k[0]]): if len(trend) >= 1: # we remove data for this week del(trend[0]) if len(trend) >= 8: # keep only the last seven weeks trend = trend[:7] if not trend: empty = True break N = max(N, len(trend)) row = [str(n) for n in trend] row.insert(0, '- %s' % chan) table.append(row) if not empty: # we've trends monday, sunday = utils.get_monday_sunday(utils.get_date_ymd(end_date)) comment += '\n\nCrash volume on the last weeks (Week N is from %s to %s):\n' % (monday.strftime('%m-%d'), sunday.strftime('%m-%d')) headers = [''] for w in range(1, N + 1): headers.append('W. N-%d' % w) comment += __mk_volume_table(table, 'byweek', headers=headers) # Add affected platforms platforms = info['platforms'] if platforms: comment += '\n\nAffected platform' if len(platforms) >= 2: comment += 's' platforms = sorted(platforms, key=lambda k: platform_order[k]) comment += ': ' + ', '.join(platforms) ranks = info['rank'] if ranks: # check if we've ranks empty = True for types in ranks.values(): for v in types.values(): if v != -1: empty = False break if not empty: comment += '\n\nCrash rank on the last 7 days:\n' headers = ['', 'Browser', 'Content', 'Plugin'] table = [] def fmt_rank(s): return None if s == -1 else '#' + str(s) for chan, types in sorted(ranks.items(), key=lambda k: channel_order[k[0]]): table.append(['- %s' % chan, fmt_rank(types['browser']), fmt_rank(types['content']), fmt_rank(types['plugin'])]) comment += __mk_volume_table(table, 'rank', headers=headers) data['comment'] = {'body': comment} return data
def get_search_date(search_start_date, start_date, end_date=utils.get_date('today')): if search_start_date: return socorro.SuperSearch.get_search_date(search_start_date, end_date) else: return socorro.SuperSearch.get_search_date(utils.get_date_str(start_date), end_date)
def get(channel, date, versions=None, product='Firefox', duration=1): """Get stability info Args: channel (str): the channel date (str): the final date versions (Optional[List[str]]): the versions to treat product (Optional[str]): the product duration (Optional[int]): the duration to retrieve the data Returns: dict: contains all the info relative to stability """ channel = channel.lower() cycle = duration <= 0 versions_info = socorro.ProductVersions.get_version_info(versions, channel=channel, product=product) versions = versions_info.keys() throttle = set(map(lambda p: p[1], versions_info.values())) diff_throttle = len(throttle) != 1 # normally the throttle is 10% for release and 100% for others channel if not diff_throttle: throttle = throttle.pop() platforms = socorro.Platforms.get_cached_all() end_date_dt = utils.get_date_ymd(date) if cycle: # we get all the start date for each versions and get the min start_date_dt = min(map(lambda p: utils.get_date_ymd(p[0]), versions_info.values())) duration = (end_date_dt - start_date_dt).days + 1 else: start_date_dt = end_date_dt - timedelta(duration - 1) start_date_str = utils.get_date_str(start_date_dt) end_date_str = utils.get_date_str(end_date_dt) # First, we get the ADI adi = socorro.ADI.get(version=versions, product=product, end_date=end_date_str, duration=duration, platforms=platforms) adi = [adi[key] for key in sorted(adi.keys(), reverse=False)] # Get the khours khours = Redash.get_khours(start_date_dt, end_date_dt, channel, versions, product) khours = [khours[key] for key in sorted(khours.keys(), reverse=False)] # Get the # of crashes (crash pings) crash_pings = Redash.get_number_of_crash(start_date_dt, end_date_dt, channel, versions, product) crashes = {} stats = {'m+c': 0., 'main': 0., 'content': 0., 'plugin': 0., 'all': 0.} for i in range(duration): d = end_date_dt - timedelta(i) crashes[d] = {} crashes[d]['socorro'] = {'global': stats.copy(), 'startup': stats.copy()} crashes[d]['telemetry'] = crash_pings[d] base = {'product': product, 'version': None, 'date': socorro.SuperSearch.get_search_date(start_date_str, end_date_str), 'release_channel': channel, '_results_number': 1, '_histogram.date': ['product', 'process_type'], '_facets_size': 3} if diff_throttle: # in this case each version could have a different throttle so we need to compute stats for each version queries = [] for v, t in versions_info.items(): cparams = base.copy() cparams['version'] = v queries.append(Query(socorro.SuperSearch.URL, cparams, functools.partial(__crash_handler, t[1]), crashes)) cparams = copy.deepcopy(cparams) cparams['uptime'] = '<60' cparams['_histogram.date'].append('uptime') queries.append(Query(socorro.SuperSearch.URL, cparams, functools.partial(__crash_handler, t[1]), crashes)) else: base['version'] = versions queries = [] queries.append(Query(socorro.SuperSearch.URL, base, functools.partial(__crash_handler, throttle), crashes)) cparams = copy.deepcopy(base) cparams['uptime'] = '<60' cparams['_histogram.date'].append('uptime') queries.append(Query(socorro.SuperSearch.URL, cparams, functools.partial(__crash_handler, throttle), crashes)) socorro.SuperSearch(queries=queries).wait() crashes = [crashes[key] for key in sorted(crashes.keys(), reverse=False)] # Now we compute the rates and the averages stats = {'m+c': [0., 0., 0., 0.], 'main': [0., 0., 0., 0.], 'content': [0., 0., 0., 0.], 'plugin': [0., 0., 0., 0.], 'all': [0., 0., 0., 0.]} averages = {} averages['socorro'] = {'global': stats, 'startup': copy.deepcopy(stats)} averages['telemetry'] = copy.deepcopy(stats) N = len(adi) # sum for i in range(N): crash_soc = crashes[i]['socorro'] for k1, v1 in averages['socorro'].items(): for k2, av in v1.items(): c = crash_soc[k1][k2] # the rate is computed for 100 adi x = utils.rate(100. * c, adi[i]) av[0] += x av[1] += x ** 2 y = utils.rate(c, khours[i]) av[2] += y av[3] += y ** 2 crash_soc[k1][k2] = (c, x, y) crash_tel = crashes[i]['telemetry'] for k1, av in averages['telemetry'].items(): c = crash_tel[k1] # the rate is computed for 100 adi x = utils.rate(100. * c, adi[i]) av[0] += x av[1] += x ** 2 y = utils.rate(c, khours[i]) av[2] += y av[3] += y ** 2 crash_tel[k1] = (c, x, y) N = float(N) averages_old = {'socorro': {}, 'telemetry': {}} averages_new = copy.deepcopy(averages_old) # mean & standard deviation av_new_soc = averages_new['socorro'] av_old_soc = averages_old['socorro'] for k1, v1 in averages['socorro'].items(): d1 = {} av_old_soc[k1] = d1 d2 = {} av_new_soc[k1] = d2 for k2, av in v1.items(): m = av[0] / N d1[k2] = (m, math.sqrt(av[1] / N - m ** 2)) m = av[2] / N d2[k2] = (m, math.sqrt(av[3] / N - m ** 2)) av_new_tel = averages_new['telemetry'] av_old_tel = averages_old['telemetry'] for k1, av in averages['telemetry'].items(): m = av[0] / N av_old_tel[k1] = (m, math.sqrt(av[1] / N - m ** 2)) m = av[2] / N av_new_tel[k1] = (m, math.sqrt(av[3] / N - m ** 2)) return {'start_date': start_date_str, 'end_date': end_date_str, 'versions': versions, 'adi': adi, 'khours': khours, 'crashes': crashes, 'averages_old': averages_old, 'averages_new': averages_new}
def monitor(emails=[], date='yesterday', path='', data=None, verbose=False, writejson=False): if not data: try: with open(path, 'r') as In: data = json.load(In) except IOError: data = {p: {c: {} for c in channels} for p in products} searches = [] start_date = utils.get_date_ymd(date) end_date = start_date + datetime.timedelta(days=1) search_date = socorro.SuperSearch.get_search_date(start_date, end_date) all_versions = {} versions_pc = defaultdict(lambda: defaultdict(lambda: [])) def handler_ss(date, product, json, data): if not json['errors']: for info in json['facets']['release_channel']: chan = info['term'] total = info['count'] if product == 'FennecAndroid': d = {'total': total} if chan in data: data[chan][date] = d else: data[chan] = {date: d} else: throttle = 10 if chan == 'release' else 1 total *= throttle d = { 'browser': 0, 'plugin': 0, 'content': 0, 'total': total } for pt in info['facets']['process_type']: term = pt['term'] count = pt['count'] if term in d: # term can be 'gpu' (very rare) d[term] += count * throttle d['browser'] = total - (d['plugin'] + d['content']) if chan in data: data[chan][date] = d else: data[chan] = {date: d} if date == 'today': dates = ['yesterday', 'today'] else: dates = [date] delay_by_channel = { 'release': config.get('MonitorStartupCrashes', 'delay_release', 12, type=int), 'beta': config.get('MonitorStartupCrashes', 'delay_beta', 4, type=int), 'aurora': config.get('MonitorStartupCrashes', 'delay_aurora', 9, type=int), 'nightly': config.get('MonitorStartupCrashes', 'delay_nightly', 9, type=int) } for data_date in dates: data_date = utils.get_date_ymd(data_date) next_data_date = data_date + datetime.timedelta(days=1) search_data_date = socorro.SuperSearch.get_search_date( data_date, next_data_date) for product in products: versions = socorro.ProductVersions.get_all_versions(product) all_versions[product] = [] for chan in channels: info = versions[chan] last_ver_major = max(info.keys()) _start_date = data_date - datetime.timedelta( weeks=delay_by_channel[chan]) for major in range(last_ver_major, last_ver_major - 4, -1): for v, d in info[major]['versions'].items(): if not v.endswith( 'b') and _start_date <= d <= data_date: all_versions[product].append(v) versions_pc[product][chan].append(v) searches.append( socorro.SuperSearch(params={ 'product': product, 'date': search_data_date, 'release_channel': channels, 'version': all_versions[product], 'uptime': '<60', '_results_number': 0, '_facets_size': 100, '_aggs.release_channel': 'process_type' }, handler=functools.partial( handler_ss, utils.get_date_str(data_date), product), handlerdata=data[product])) for s in searches: s.wait() if writejson and path: with open(path, 'w') as Out: json.dump(data, Out, sort_keys=True) new_start_date = start_date - datetime.timedelta(days=1) new_search_date = socorro.SuperSearch.get_search_date( new_start_date, end_date) def handler_ss_spikers(json, data): if not json['errors']: for facets in json['facets']['histogram_date']: date = utils.get_date_ymd(facets['term']) s = facets['facets']['signature'] d = {} data[date] = d for signature in s: count = signature['count'] sgn = signature['term'] d[sgn] = count spikers_info = defaultdict(lambda: defaultdict(lambda: dict())) searches = [] for product, i1 in data.items(): for chan, i2 in i1.items(): _data = [ float(i[1]['total']) for i in sorted(i2.items(), key=lambda p: utils.get_date_ymd(p[0])) if utils.get_date_ymd(i[0]) <= start_date ] # print(product, chan) issp = spikeanalysis.is_spiking_ma(_data, alpha=2.5, win=7, method='mean', plot=False) == 'up' # spikeanalysis.get_spikes_ma(_data, alpha=2.5, win=7, method='mean', plot=True) if issp: # spikeanalysis.is_spiking_ma(_data, alpha=2.5, win=7, method='mean', plot=True) searches.append( socorro.SuperSearch( params={ 'product': product, 'date': new_search_date, 'release_channel': chan, 'version': all_versions[product], 'uptime': '<60', '_results_number': 0, '_histogram.date': 'signature', '_facets_size': 100 }, handler=handler_ss_spikers, handlerdata=spikers_info[product][chan])) for s in searches: s.wait() if spikers_info: # So we've some spikes... need to send an email with all the info searches = [] interesting_sgns = get_most_signifiant_increases(spikers_info) bugs_by_signature = get_bugs(interesting_sgns) affected_chans = set() crash_data = {p: {} for p in spikers_info.keys()} spikes_number = 0 def handler_global(product, json, data): if not json['errors']: for info in json['facets']['release_channel']: chan = info['term'] throttle = 10 if product == 'FennecAndroid' and chan == 'release' else 1 total = info['count'] * throttle data[chan] = total for p, i1 in spikers_info.items(): searches.append( socorro.SuperSearch(params={ 'product': p, 'date': search_date, 'release_channel': list(i1.keys()), 'version': all_versions[p], '_results_number': 0, '_facets_size': 5, '_aggs.release_channel': 'signature' }, handler=functools.partial( handler_global, p), handlerdata=crash_data[p])) for c in i1.keys(): spikes_number += 1 affected_chans.add(c) url = socorro.SuperSearch.get_link({ 'product': p, 'date': search_date, 'release_channel': c, 'version': versions_pc[p][c], 'uptime': '<60' }) sgns_chan = interesting_sgns[p] sgns_stats = [(s, bugs_by_signature[s], t[2], t[1], t[3]) for s, t in sorted(sgns_chan[c].items(), key=lambda p: p[1][0], reverse=True)] sgns_chan[c] = (url, sgns_stats) for s in searches: s.wait() env = Environment(loader=FileSystemLoader('templates')) env.filters['inflect'] = inflect template = env.get_template('startup_crashes_email') _is = OrderedDict() for product in sorted(interesting_sgns.keys()): _is[product] = OrderedDict() for chan in sorted(interesting_sgns[product].keys()): _is[product][chan] = interesting_sgns[product][chan] interesting_sgns = _is body = template.render( spikes_number=spikes_number, spikes_number_word=inflect.engine().number_to_words(spikes_number), crash_data=crash_data, start_date=utils.get_date_str(new_start_date), end_date=utils.get_date_str(start_date), interesting_sgns=interesting_sgns) title = 'Spikes in startup crashes in %s' % ', '.join(affected_chans) if emails: gmail.send(emails, title, body, html=True) if verbose: print('Title: %s' % title) print('Body:') print(body) return {'title': title, 'body': body} return None
def test_get_date_str(self): date = '1991-04-16' self.assertEqual( utils.get_date_str(datetime.datetime.strptime(date, '%Y-%m-%d')), date)
def reformat_data(data): _data = {} for k, v in data.items(): _data[utils.get_date_str(k)] = v return _data
def get_uuids_for_spiking_signatures(channel, cache=None, product='Firefox', date='today', limit=10000, max_days=3, threshold=5): psttz = pytz.timezone('US/Pacific') end_date = utils.get_date_ymd(date) # 2016-10-18 UTC end_date_moz = psttz.localize(datetime(end_date.year, end_date.month, end_date.day)) # 2016-10-18 PST end_buildid = utils.get_buildid_from_date(end_date_moz) # < 20161018000000 start_date_moz = end_date_moz - timedelta(days=max_days + 1) # 2016-10-14 PST (max_days == 3) start_buildid = utils.get_buildid_from_date(start_date_moz) # >= 20161014000000 search_buildid = ['>=' + start_buildid, '<' + end_buildid] start_date = utils.as_utc(start_date_moz) # 2016-10-14 07:00:00 UTC search_date = '>=' + utils.get_date_str(start_date) data = defaultdict(lambda: defaultdict(lambda: 0)) buildids = {} def handler(json, data): if not json['errors']: for facets in json['facets']['build_id']: date = utils.get_date_from_buildid(facets['term']).astimezone(psttz) buildids[date] = facets['count'] for s in facets['facets']['signature']: sgn = s['term'] count = s['count'] data[sgn][date] += count socorro.SuperSearch(params={'product': product, 'date': search_date, 'build_id': search_buildid, 'release_channel': channel, '_aggs.build_id': 'signature', '_facets_size': limit, '_results_number': 0}, handler=handler, handlerdata=data).wait() _data = {} base = {start_date_moz + timedelta(days=i): {'buildids': {}, 'total': 0} for i in range(max_days + 1)} # from 2016-10-14 to 2016-10-17 PST for sgn, info in data.items(): d = copy.deepcopy(base) _data[sgn] = d for bid, count in info.items(): date = psttz.localize(datetime(bid.year, bid.month, bid.day)) d[date]['buildids'][bid] = count d[date]['total'] += count data = _data spiking_signatures = [] for sgn, info in data.items(): stats2 = [i['total'] for _, i in sorted(info.items(), key=lambda p: p[0])] if all(i == 0 for i in stats2[:-1]) and stats2[-1] >= threshold: spiking_signatures.append(sgn) data = None if spiking_signatures: # sort the signatures to be sure to always have the same order for the test spiking_signatures = sorted(spiking_signatures) start_buildid = utils.get_buildid_from_date(end_date_moz - timedelta(days=1)) search_buildid = ['>=' + start_buildid, '<' + end_buildid] queries = [] data = defaultdict(lambda: list()) def handler(json, data): if not json['errors']: for facets in json['facets']['proto_signature']: proto = facets['term'] count = facets['count'] facets = facets['facets'] sgn = facets['signature'][0]['term'] first_uuid = facets['uuid'][0]['term'] uuids = {i['term'] for i in facets['uuid']} if cache: i = uuids.intersection(cache['uuids']) uuid = i.pop() if i else first_uuid else: uuid = first_uuid data[sgn].append({'proto': proto, 'uuid': uuid, 'count': count}) for sgns in Connection.chunks(spiking_signatures, 5): queries.append(Query(socorro.SuperSearch.URL, {'product': product, 'date': search_date, 'build_id': search_buildid, 'signature': ['=' + s for s in sgns], 'release_channel': channel, '_aggs.proto_signature': ['uuid', 'signature'], '_facets_size': 10000, '_results_number': 0}, handler=handler, handlerdata=data)) socorro.SuperSearch(queries=queries).wait() return data
def get(channel, date, product='Firefox', duration=11, tc_limit=50, crash_type='all', startup=False): """Get crashes info Args: channel (str): the channel date (str): the final date product (Optional[str]): the product duration (Optional[int]): the duration to retrieve the data tc_limit (Optional[int]): the number of topcrashes to load crash_type (Optional[str]): 'all' (default) or 'browser' or 'content' or 'plugin' Returns: dict: contains all the info relative to the crashes """ channel = channel.lower() version = v[channel] sys.stdout.write('Getting version information from Socorro...') sys.stdout.flush() versions = versions_util.get_channel_versions(channel, product) sys.stdout.write(' ✔\n') sys.stdout.flush() if crash_type and isinstance(crash_type, six.string_types): crash_type = [crash_type] _date = utils.get_date_ymd(date) start_date = utils.get_date_str(_date - timedelta(duration - 1)) end_date = utils.get_date_str(_date) signatures = {} def signature_handler(json): for signature in json['facets']['signature']: signatures[signature['term']] = [signature['count'], 0, 0, 0, 0, 0] for platform in signature['facets']['platform']: if platform['term'] == 'Linux': signatures[signature['term']][3] = platform['count'] elif platform['term'] == 'Windows NT': signatures[signature['term']][1] = platform['count'] elif platform['term'] == 'Mac OS X': signatures[signature['term']][2] = platform['count'] for startup_crash in signature['facets']['startup_crash']: if startup_crash['term'] in ['1', 'T']: signatures[signature['term']][4] += startup_crash['count'] signatures[signature['term']][5] = signature['facets'][ 'cardinality_install_time']['value'] params = { 'product': product, 'version': versions, 'date': socorro.SuperSearch.get_search_date(start_date, end_date), 'release_channel': channel, '_aggs.signature': ['platform', '_cardinality.install_time', 'startup_crash'], '_results_number': 0, '_facets_size': tc_limit, } if startup: params['startup_crash'] = True sys.stdout.write('Getting top signatures from Socorro...') sys.stdout.flush() socorro.SuperSearch(params=params, handler=signature_handler).wait() sys.stdout.write(' ✔\n') sys.stdout.flush() bug_flags = [ 'resolution', 'id', 'last_change_time', 'cf_tracking_firefox' + str(version) ] for i in range(int(version), int(v['nightly']) + 1): bug_flags.append('cf_status_firefox' + str(i)) # TODO: too many requests... should be improved with chunks bugs = {} # TODO: Use regexp, when the Bugzilla bug that prevents them from working will be fixed. base = { 'j_top': 'OR', 'o1': 'substring', 'f1': 'cf_crash_signature', 'v1': None, 'o2': 'substring', 'f2': 'cf_crash_signature', 'v2': None, 'o3': 'substring', 'f3': 'cf_crash_signature', 'v3': None, 'o4': 'substring', 'f4': 'cf_crash_signature', 'v4': None, 'include_fields': bug_flags } queries = [] for sgn in signatures.keys(): cparams = base.copy() cparams['v1'] = '[@' + sgn + ']' cparams['v2'] = '[@ ' + sgn + ' ]' cparams['v3'] = '[@ ' + sgn + ']' cparams['v4'] = '[@' + sgn + ' ]' bugs[sgn] = [] queries.append( Query(Bugzilla.API_URL, cparams, __bug_handler, bugs[sgn])) res_bugs = Bugzilla(queries=queries) # we have stats by signature in self.signatures # for each signature get the number of crashes on the last X days # so get the signature trend trends = {} default_trend = {} for i in range(duration): default_trend[_date - timedelta(i)] = 0 base = { 'product': product, 'version': versions, 'signature': None, 'date': socorro.SuperSearch.get_search_date(start_date, end_date), 'release_channel': channel, '_results_number': 0, '_histogram.date': ['signature'], '_histogram_interval': 1 } queries = [] for sgns in Connection.chunks( list(map(lambda sgn: '=' + sgn, signatures.keys())), 10): sgn_group = [] for sgn in sgns: if sum(len(s) for s in sgn_group) >= 1000: cparams = base.copy() cparams['signature'] = sgn_group queries.append( Query(socorro.SuperSearch.URL, cparams, functools.partial(__trend_handler, default_trend), trends)) sgn_group = [] sgn_group.append(sgn) if len(sgn_group) > 0: cparams = base.copy() cparams['signature'] = sgn_group queries.append( Query(socorro.SuperSearch.URL, cparams, functools.partial(__trend_handler, default_trend), trends)) sys.stdout.write('Getting trends for top signatures from Socorro...') sys.stdout.flush() socorro.SuperSearch(queries=queries).wait() sys.stdout.write(' ✔\n') sys.stdout.flush() for sgn, trend in trends.items(): signatures[sgn] = (signatures[sgn], [ trend[key] for key in sorted(trend.keys(), reverse=True) ]) _signatures = {} # order self.signatures by crash count sorted_signatures = sorted(signatures.items(), key=lambda x: x[1][0][0], reverse=True) i = 1 for s in sorted_signatures: _signatures[s[0]] = i # top crash rank i += 1 sys.stdout.write( 'Getting bugs linked to the top signatures from Bugzilla...') sys.stdout.flush() while True: try: for r in res_bugs.results: r.result(timeout=2) break except TimeoutError: sys.stdout.write('.') sys.stdout.flush() sys.stdout.write(' ✔\n') sys.stdout.flush() # TODO: In the first query to get the bugs, also get dupe_of and avoid the first query # in follow_dup (so modify follow_dup to accept both a bug ID or a bug object). queries = [] for sgn in signatures.keys(): duplicate_ids = [ bug['id'] for bug in bugs[sgn] if bug['resolution'] == 'DUPLICATE' ] # Remove bugs resolved as DUPLICATE from the list of bugs associated to the signature. bugs[sgn] = [ bug for bug in bugs[sgn] if bug['id'] not in duplicate_ids ] # Find duplicates for bugs resolved as DUPLICATE. duplicates = { k: v for k, v in Bugzilla.follow_dup(duplicate_ids).items() if v is not None } duplicate_targets = [ bug_id for bug_id in duplicates.values() if int(bug_id) not in [bug['id'] for bug in bugs[sgn]] ] if len(duplicate_targets) == 0: continue # Get info about bugs that the DUPLICATE bugs have been duped to. params = { 'id': ','.join(duplicate_targets), 'include_fields': bug_flags, } queries.append( Query(Bugzilla.API_URL, params, __bug_handler, bugs[sgn])) sys.stdout.write( 'Resolving duplicate bugs to the bugs they\'ve been duplicated to...') sys.stdout.flush() Bugzilla(queries=queries).wait() sys.stdout.write(' ✔\n') sys.stdout.flush() for sgn, stats in signatures.items(): # stats is 2-uple: ([count, win_count, mac_count, linux_count, startup_count], trend) startup_percent = float(stats[0][4]) / float(stats[0][0]) _signatures[sgn] = { 'tc_rank': _signatures[sgn], 'crash_count': stats[0][0], 'estimated_user_count': stats[0][5], 'startup_percent': startup_percent, 'bugs': bugs[sgn] } return { 'start_date': start_date, 'end_date': end_date, 'versions': list(versions), 'signatures': _signatures, }
def monitor(emails=[], date='yesterday', path='', data=None, verbose=False, writejson=False): if not data: try: with open(path, 'r') as In: data = json.load(In) except IOError: data = {p: {c: {} for c in channels} for p in products} searches = [] start_date = utils.get_date_ymd(date) end_date = start_date + datetime.timedelta(days=1) search_date = socorro.SuperSearch.get_search_date(start_date, end_date) all_versions = {} versions_pc = defaultdict(lambda: defaultdict(lambda: [])) def handler_ss(date, product, json, data): if not json['errors']: for info in json['facets']['release_channel']: chan = info['term'] total = info['count'] if product == 'FennecAndroid': d = {'total': total} if chan in data: data[chan][date] = d else: data[chan] = {date: d} else: throttle = 10 if chan == 'release' else 1 total *= throttle d = {'browser': 0, 'plugin': 0, 'content': 0, 'total': total} for pt in info['facets']['process_type']: term = pt['term'] count = pt['count'] if term in d: # term can be 'gpu' (very rare) d[term] += count * throttle d['browser'] = total - (d['plugin'] + d['content']) if chan in data: data[chan][date] = d else: data[chan] = {date: d} if date == 'today': dates = ['yesterday', 'today'] else: dates = [date] delay_by_channel = {'release': config.get('MonitorStartupCrashes', 'delay_release', 12, type=int), 'beta': config.get('MonitorStartupCrashes', 'delay_beta', 4, type=int), 'aurora': config.get('MonitorStartupCrashes', 'delay_aurora', 9, type=int), 'nightly': config.get('MonitorStartupCrashes', 'delay_nightly', 9, type=int)} for data_date in dates: data_date = utils.get_date_ymd(data_date) next_data_date = data_date + datetime.timedelta(days=1) search_data_date = socorro.SuperSearch.get_search_date(data_date, next_data_date) for product in products: versions = socorro.ProductVersions.get_all_versions(product) all_versions[product] = [] for chan in channels: info = versions[chan] last_ver_major = max(info.keys()) _start_date = data_date - datetime.timedelta(weeks=delay_by_channel[chan]) for major in range(last_ver_major, last_ver_major - 4, -1): for v, d in info[major]['versions'].items(): if not v.endswith('b') and _start_date <= d <= data_date: all_versions[product].append(v) versions_pc[product][chan].append(v) searches.append(socorro.SuperSearch(params={'product': product, 'date': search_data_date, 'release_channel': channels, 'version': all_versions[product], 'uptime': '<60', '_results_number': 0, '_facets_size': 100, '_aggs.release_channel': 'process_type'}, handler=functools.partial(handler_ss, utils.get_date_str(data_date), product), handlerdata=data[product])) for s in searches: s.wait() if writejson and path: with open(path, 'w') as Out: json.dump(data, Out, sort_keys=True) new_start_date = start_date - datetime.timedelta(days=1) new_search_date = socorro.SuperSearch.get_search_date(new_start_date, end_date) def handler_ss_spikers(json, data): if not json['errors']: for facets in json['facets']['histogram_date']: date = utils.get_date_ymd(facets['term']) s = facets['facets']['signature'] d = {} data[date] = d for signature in s: count = signature['count'] sgn = signature['term'] d[sgn] = count spikers_info = defaultdict(lambda: defaultdict(lambda: dict())) searches = [] for product, i1 in data.items(): for chan, i2 in i1.items(): _data = [float(i[1]['total']) for i in sorted(i2.items(), key=lambda p: utils.get_date_ymd(p[0])) if utils.get_date_ymd(i[0]) <= start_date] # print(product, chan) issp = spikeanalysis.is_spiking_ma(_data, alpha=2.5, win=7, method='mean', plot=False) == 'up' # spikeanalysis.get_spikes_ma(_data, alpha=2.5, win=7, method='mean', plot=True) if issp: # spikeanalysis.is_spiking_ma(_data, alpha=2.5, win=7, method='mean', plot=True) searches.append(socorro.SuperSearch(params={'product': product, 'date': new_search_date, 'release_channel': chan, 'version': all_versions[product], 'uptime': '<60', '_results_number': 0, '_histogram.date': 'signature', '_facets_size': 100}, handler=handler_ss_spikers, handlerdata=spikers_info[product][chan])) for s in searches: s.wait() if spikers_info: # So we've some spikes... need to send an email with all the info searches = [] interesting_sgns = get_most_signifiant_increases(spikers_info) bugs_by_signature = get_bugs(interesting_sgns) affected_chans = set() crash_data = {p: {} for p in spikers_info.keys()} spikes_number = 0 def handler_global(product, json, data): if not json['errors']: for info in json['facets']['release_channel']: chan = info['term'] throttle = 10 if product == 'FennecAndroid' and chan == 'release' else 1 total = info['count'] * throttle data[chan] = total for p, i1 in spikers_info.items(): searches.append(socorro.SuperSearch(params={'product': p, 'date': search_date, 'release_channel': list(i1.keys()), 'version': all_versions[p], '_results_number': 0, '_facets_size': 5, '_aggs.release_channel': 'signature'}, handler=functools.partial(handler_global, p), handlerdata=crash_data[p])) for c in i1.keys(): spikes_number += 1 affected_chans.add(c) url = socorro.SuperSearch.get_link({'product': p, 'date': search_date, 'release_channel': c, 'version': versions_pc[p][c], 'uptime': '<60'}) sgns_chan = interesting_sgns[p] sgns_stats = [(s, bugs_by_signature[s], t[2], t[1], t[3]) for s, t in sorted(sgns_chan[c].items(), key=lambda p: p[1][0], reverse=True)] sgns_chan[c] = (url, sgns_stats) for s in searches: s.wait() env = Environment(loader=FileSystemLoader('templates')) env.filters['inflect'] = inflect template = env.get_template('startup_crashes_email') _is = OrderedDict() for product in sorted(interesting_sgns.keys()): _is[product] = OrderedDict() for chan in sorted(interesting_sgns[product].keys()): _is[product][chan] = interesting_sgns[product][chan] interesting_sgns = _is body = template.render(spikes_number=spikes_number, spikes_number_word=inflect.engine().number_to_words(spikes_number), crash_data=crash_data, start_date=utils.get_date_str(new_start_date), end_date=utils.get_date_str(start_date), interesting_sgns=interesting_sgns) title = 'Spikes in startup crashes in %s' % ', '.join(affected_chans) if emails: gmail.send(emails, title, body, html=True) if verbose: print('Title: %s' % title) print('Body:') print(body) return {'title': title, 'body': body} return None
def get(channel, date, versions=None, product='Firefox', duration=1): """Get stability info Args: channel (str): the channel date (str): the final date versions (Optional[List[str]]): the versions to treat product (Optional[str]): the product duration (Optional[int]): the duration to retrieve the data Returns: dict: contains all the info relative to stability """ channel = channel.lower() cycle = duration <= 0 versions_info = socorro.ProductVersions.get_version_info(versions, channel=channel, product=product) versions = versions_info.keys() throttle = set(map(lambda p: p[1], versions_info.values())) diff_throttle = len(throttle) != 1 # normally the throttle is 10% for release and 100% for others channel if not diff_throttle: throttle = throttle.pop() platforms = socorro.Platforms.get_cached_all() end_date_dt = utils.get_date_ymd(date) if cycle: # we get all the start date for each versions and get the min start_date_dt = min( map(lambda p: utils.get_date_ymd(p[0]), versions_info.values())) duration = (end_date_dt - start_date_dt).days + 1 else: start_date_dt = end_date_dt - timedelta(duration - 1) start_date_str = utils.get_date_str(start_date_dt) end_date_str = utils.get_date_str(end_date_dt) # First, we get the ADI adi = socorro.ADI.get(version=versions, product=product, end_date=end_date_str, duration=duration, platforms=platforms) adi = [adi[key] for key in sorted(adi.keys(), reverse=False)] # Get the khours khours = Redash.get_khours(start_date_dt, end_date_dt, channel, versions, product) khours = [khours[key] for key in sorted(khours.keys(), reverse=False)] # Get the # of crashes (crash pings) crash_pings = Redash.get_number_of_crash(start_date_dt, end_date_dt, channel, versions, product) crashes = {} stats = {'m+c': 0., 'main': 0., 'content': 0., 'plugin': 0., 'all': 0.} for i in range(duration): d = end_date_dt - timedelta(i) crashes[d] = {} crashes[d]['socorro'] = { 'global': stats.copy(), 'startup': stats.copy() } crashes[d]['telemetry'] = crash_pings[d] base = { 'product': product, 'version': None, 'date': socorro.SuperSearch.get_search_date(start_date_str, end_date_str), 'release_channel': channel, '_results_number': 1, '_histogram.date': ['product', 'process_type'], '_facets_size': 3 } if diff_throttle: # in this case each version could have a different throttle so we need to compute stats for each version queries = [] for v, t in versions_info.items(): cparams = base.copy() cparams['version'] = v queries.append( Query(socorro.SuperSearch.URL, cparams, functools.partial(__crash_handler, t[1]), crashes)) cparams = copy.deepcopy(cparams) cparams['uptime'] = '<60' cparams['_histogram.date'].append('uptime') queries.append( Query(socorro.SuperSearch.URL, cparams, functools.partial(__crash_handler, t[1]), crashes)) else: base['version'] = versions queries = [] queries.append( Query(socorro.SuperSearch.URL, base, functools.partial(__crash_handler, throttle), crashes)) cparams = copy.deepcopy(base) cparams['uptime'] = '<60' cparams['_histogram.date'].append('uptime') queries.append( Query(socorro.SuperSearch.URL, cparams, functools.partial(__crash_handler, throttle), crashes)) socorro.SuperSearch(queries=queries).wait() crashes = [crashes[key] for key in sorted(crashes.keys(), reverse=False)] # Now we compute the rates and the averages stats = { 'm+c': [0., 0., 0., 0.], 'main': [0., 0., 0., 0.], 'content': [0., 0., 0., 0.], 'plugin': [0., 0., 0., 0.], 'all': [0., 0., 0., 0.] } averages = {} averages['socorro'] = {'global': stats, 'startup': copy.deepcopy(stats)} averages['telemetry'] = copy.deepcopy(stats) N = len(adi) # sum for i in range(N): crash_soc = crashes[i]['socorro'] for k1, v1 in averages['socorro'].items(): for k2, av in v1.items(): c = crash_soc[k1][k2] # the rate is computed for 100 adi x = utils.rate(100. * c, adi[i]) av[0] += x av[1] += x**2 y = utils.rate(c, khours[i]) av[2] += y av[3] += y**2 crash_soc[k1][k2] = (c, x, y) crash_tel = crashes[i]['telemetry'] for k1, av in averages['telemetry'].items(): c = crash_tel[k1] # the rate is computed for 100 adi x = utils.rate(100. * c, adi[i]) av[0] += x av[1] += x**2 y = utils.rate(c, khours[i]) av[2] += y av[3] += y**2 crash_tel[k1] = (c, x, y) N = float(N) averages_old = {'socorro': {}, 'telemetry': {}} averages_new = copy.deepcopy(averages_old) # mean & standard deviation av_new_soc = averages_new['socorro'] av_old_soc = averages_old['socorro'] for k1, v1 in averages['socorro'].items(): d1 = {} av_old_soc[k1] = d1 d2 = {} av_new_soc[k1] = d2 for k2, av in v1.items(): m = av[0] / N d1[k2] = (m, math.sqrt(av[1] / N - m**2)) m = av[2] / N d2[k2] = (m, math.sqrt(av[3] / N - m**2)) av_new_tel = averages_new['telemetry'] av_old_tel = averages_old['telemetry'] for k1, av in averages['telemetry'].items(): m = av[0] / N av_old_tel[k1] = (m, math.sqrt(av[1] / N - m**2)) m = av[2] / N av_new_tel[k1] = (m, math.sqrt(av[3] / N - m**2)) return { 'start_date': start_date_str, 'end_date': end_date_str, 'versions': versions, 'adi': adi, 'khours': khours, 'crashes': crashes, 'averages_old': averages_old, 'averages_new': averages_new }
def get_last(): lastdate = db.session.query(Lastdate.last) lastdate = utils.get_date_str(lastdate.first().last) return lastdate
def get_uuids_for_spiking_signatures(channel, cache=None, product='Firefox', date='today', limit=10000, max_days=3, threshold=5): psttz = pytz.timezone('US/Pacific') end_date = utils.get_date_ymd(date) # 2016-10-18 UTC end_date_moz = psttz.localize( datetime(end_date.year, end_date.month, end_date.day)) # 2016-10-18 PST end_buildid = utils.get_buildid_from_date(end_date_moz) # < 20161018000000 start_date_moz = end_date_moz - timedelta( days=max_days + 1) # 2016-10-14 PST (max_days == 3) start_buildid = utils.get_buildid_from_date( start_date_moz) # >= 20161014000000 search_buildid = ['>=' + start_buildid, '<' + end_buildid] start_date = utils.as_utc(start_date_moz) # 2016-10-14 07:00:00 UTC search_date = '>=' + utils.get_date_str(start_date) data = defaultdict(lambda: defaultdict(lambda: 0)) buildids = {} def handler(json, data): if not json['errors']: for facets in json['facets']['build_id']: date = utils.get_date_from_buildid( facets['term']).astimezone(psttz) buildids[date] = facets['count'] for s in facets['facets']['signature']: sgn = s['term'] count = s['count'] data[sgn][date] += count socorro.SuperSearch(params={ 'product': product, 'date': search_date, 'build_id': search_buildid, 'release_channel': channel, '_aggs.build_id': 'signature', '_facets_size': limit, '_results_number': 0 }, handler=handler, handlerdata=data).wait() _data = {} base = { start_date_moz + timedelta(days=i): { 'buildids': {}, 'total': 0 } for i in range(max_days + 1) } # from 2016-10-14 to 2016-10-17 PST for sgn, info in data.items(): d = copy.deepcopy(base) _data[sgn] = d for bid, count in info.items(): date = psttz.localize(datetime(bid.year, bid.month, bid.day)) d[date]['buildids'][bid] = count d[date]['total'] += count data = _data spiking_signatures = [] for sgn, info in data.items(): stats2 = [ i['total'] for _, i in sorted(info.items(), key=lambda p: p[0]) ] if all(i == 0 for i in stats2[:-1]) and stats2[-1] >= threshold: spiking_signatures.append(sgn) data = None if spiking_signatures: # sort the signatures to be sure to always have the same order for the test spiking_signatures = sorted(spiking_signatures) start_buildid = utils.get_buildid_from_date(end_date_moz - timedelta(days=1)) search_buildid = ['>=' + start_buildid, '<' + end_buildid] queries = [] data = defaultdict(lambda: list()) def handler(json, data): if not json['errors']: for facets in json['facets']['proto_signature']: proto = facets['term'] count = facets['count'] facets = facets['facets'] sgn = facets['signature'][0]['term'] first_uuid = facets['uuid'][0]['term'] uuids = {i['term'] for i in facets['uuid']} if cache: i = uuids.intersection(cache['uuids']) uuid = i.pop() if i else first_uuid else: uuid = first_uuid data[sgn].append({ 'proto': proto, 'uuid': uuid, 'count': count }) for sgns in Connection.chunks(spiking_signatures, 5): queries.append( Query(socorro.SuperSearch.URL, { 'product': product, 'date': search_date, 'build_id': search_buildid, 'signature': ['=' + s for s in sgns], 'release_channel': channel, '_aggs.proto_signature': ['uuid', 'signature'], '_facets_size': 10000, '_results_number': 0 }, handler=handler, handlerdata=data)) socorro.SuperSearch(queries=queries).wait() return data
def get_correct_date(date): date = get_date(date) if date: return utils.get_date_str(date) return utils.get_date('today')