def get_buildids_from_socorro(search_buildid, search_date, product): """Get the builds from socorro for nightly channel. For other channels we use the database (fed with buildhub data)""" def handler(json, data): if json["errors"] or not json["facets"]["build_id"]: return for facets in json["facets"]["build_id"]: bid = facets["term"] data.append(bid) params = { "product": product, "release_channel": "nightly", "date": search_date, "build_id": search_buildid, "_facets": "build_id", "_results_number": 0, "_facets_size": 100, } data = [] socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait() data = sorted(data) return data
def get_buildids_from_socorro(search_buildid, search_date, product): """Get the builds from socorro for nightly channel. For other channels we use the database (fed with buildhub data)""" def handler(json, data): if json['errors'] or not json['facets']['build_id']: return for facets in json['facets']['build_id']: bid = facets['term'] data.append(bid) params = { 'product': product, 'release_channel': 'nightly', 'date': search_date, 'build_id': search_buildid, '_facets': 'build_id', '_results_number': 0, '_facets_size': 100 } data = [] socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait() data = sorted(data) return data
def get(channel, versions=None, product='Firefox', start_date=None, end_date='today', duration=30, platforms=None): if not isinstance(versions, list): if isinstance(versions, numbers.Number): versions = socorro.ProductVersions.get_active(vnumber=versions, product=product) else: versions = socorro.ProductVersions.get_active(product=product) versions = versions[channel.lower()] if start_date: _sdate = utils.get_date_ymd(start_date) _edate = utils.get_date_ymd(end_date) duration = (_edate - _sdate).days adi = socorro.ADI.get(version=versions, product=product, end_date=end_date, duration=duration, platforms=platforms) data = {} for d, n in adi.items(): data[d] = {'adi': n, 'browser': 0, 'content': 0, 'plugin': 0, 'browser_rate': 0, 'content_rate': 0, 'b+c_rate': 0, 'plugin_rate': 0} start_date = utils.get_date(end_date, duration) search_date = socorro.SuperSearch.get_search_date(start_date, end_date) socorro.SuperSearch(params={'product': product, 'version': versions, 'release_channel': channel, 'date': search_date, '_results_number': 0, '_facets_size': 2, # 2 is for a facet on plugin and on content '_histogram.date': ['process_type']}, handler=__super_search_handler, handlerdata=data).wait() return data
def get(channels, product='Firefox', date='today', query={}): today = utils.get_date_ymd(date) tomorrow = today + relativedelta(days=1) six_months_ago = today - relativedelta(weeks=25) search_date = socorro.SuperSearch.get_search_date(six_months_ago, tomorrow) data = {chan: {} for chan in channels} def handler(json, data): if not json['facets']['histogram_date']: return for facets in json['facets']['histogram_date']: date = utils.get_date_ymd(facets['term']) channels = facets['facets']['release_channel'] for chan in channels: total = chan['count'] channel = chan['term'] data[channel][date] = total params = { 'product': product, 'date': search_date, 'release_channel': channels, '_histogram.date': 'release_channel', '_results_number': 0 } params.update(query) socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait() return data
def get_total(channels, product='Firefox', date='today'): today = utils.get_date_ymd(date) tomorrow = today + relativedelta(days=1) search_date = socorro.SuperSearch.get_search_date(today, tomorrow) data = {chan: 0 for chan in channels} def handler(json, data): if not json['facets']['histogram_date']: return for facets in json['facets']['histogram_date']: channels = facets['facets']['release_channel'] for chan in channels: total = chan['count'] channel = chan['term'] data[channel] = total socorro.SuperSearch(params={ 'product': product, 'date': search_date, 'release_channel': channels, '_histogram.date': 'release_channel', '_results_number': 0 }, handler=handler, handlerdata=data).wait() return data
def analyze_gfx_critical_errors(signature='', product='Firefox', channel=['all'], versions=[], start_date=''): if product.lower() == 'firefox': product = 'Firefox' if channel == [] or channel[0].lower() == 'all': channel = ['release', 'beta', 'nightly'] if product == 'Firefox': channel.append('esr') else: channel = [c.lower() for c in channel] if not versions: base_versions = libmozdata.versions.get(base=True) versions_by_channel = socorro.ProductVersions.get_info_from_major( base_versions, product=product) versions = [] for v1 in versions_by_channel.values(): for v2 in v1: versions.append(v2['version']) if not start_date: start_date = utils.get_date('today', 7) gfx_critical_errors = get_critical_errors() count = {} def handler(json, gfx_critical_error): count[gfx_critical_error] = json['total'] base_params = { 'product': product, 'release_channel': channel, 'version': versions, 'date': '>=' + start_date, '_results_number': 0, '_facets_size': 0, } if signature: base_params['signature'] = signature queries = [] for gfx_critical_error in gfx_critical_errors: params = base_params.copy() params['graphics_critical_error'] = '~' + gfx_critical_error queries.append( Query(socorro.SuperSearch.URL, params=params, handler=handler, handlerdata=gfx_critical_error)) socorro.SuperSearch(queries=queries).wait() return count
def get_proto_small(product, signatures, search_date, channel): """Get the proto-signatures for signature with a small number of crashes. Since we 'must' aggregate uuid on proto-signatures, to be faster we query several signatures: it's possible because we know that card(proto) <= card(crashes) for a given signature.""" logger.info('Get proto-signatures (small) for {}-{}: started.'.format( product, channel)) def handler(bid, threshold, json, data): if not json['facets']['proto_signature']: return for facets in json['facets']['proto_signature']: _facets = facets['facets'] sgn = _facets['signature'][0]['term'] protos = data[sgn]['protos'][bid] if len(protos) < threshold: proto = facets['term'] count = facets['count'] uuid = _facets['uuid'][0]['term'] protos.append({'proto': proto, 'count': count, 'uuid': uuid}) for facets in json['facets']['signature']: sgn = facets['term'] count = facets['facets']['cardinality_install_time']['value'] data[sgn]['installs'][bid] = 1 if count == 0 else count limit = config.get_limit_facets() threshold = config.get_threshold('protos', product, channel) base_params = { 'product': product, 'release_channel': utils.get_search_channel(channel), 'date': search_date, 'build_id': '', 'signature': '', '_aggs.proto_signature': ['uuid', 'signature'], '_aggs.signature': '_cardinality.install_time', '_results_number': 0, '_facets': 'release_channel', '_facets_size': limit } sgns_by_bids = utils.get_sgns_by_bids(signatures) for bid, all_signatures in sgns_by_bids.items(): params = copy.deepcopy(base_params) params['build_id'] = utils.get_buildid(bid) queries = [] hdler = functools.partial(handler, bid, threshold) for sgns in Connection.chunks(all_signatures, 10): params = copy.deepcopy(params) params['signature'] = ['=' + s for s in sgns] queries.append( Query(socorro.SuperSearch.URL, params=params, handler=hdler, handlerdata=signatures)) socorro.SuperSearch(queries=queries).wait() logger.info('Get proto-signatures (small) for {}-{}: finished.'.format( product, channel))
def filter_nightly_buildids(buildids): def handler(threshold, json, data): if not json['facets']['build_id']: return for facets in json['facets']['build_id']: count = facets['count'] if count >= threshold: bid = utils.get_build_date(facets['term']) data[bid] = True params = { 'product': '', 'build_id': '', 'date': '', 'release_channel': 'nightly', '_aggs.build_id': 'release_channel', '_results_number': 0, '_facets': 'release_channel', '_facets_size': 1000 } data = {'Firefox': None, 'FennecAndroid': None} queries = [] for prod in data.keys(): pparams = copy.deepcopy(params) pparams['product'] = prod threshold = config.get_min_total(prod, 'nightly') data[prod] = D = {} for bids in Connection.chunks(buildids[prod]['nightly'], chunk_size=128): pparams = copy.deepcopy(pparams) pparams['date'] = '>=' + bids[0][0].strftime('%Y-%m-%d') pparams['build_id'] = L = [] for b in bids: L.append(utils.get_buildid(b[0])) D[b[0]] = False hdler = functools.partial(handler, threshold) queries.append( Query(socorro.SuperSearch.URL, params=pparams, handler=hdler, handlerdata=D)) socorro.SuperSearch(queries=queries).wait() for prod, info in data.items(): bids = buildids[prod]['nightly'] L = [(bid, info[bid[0]]) for bid in bids] for i in range(len(L) - 1, -1, -1): if not L[i][1]: L[i] = (L[i][0], True) else: break buildids[prod]['nightly'] = [x[0] for x in L if x[1]]
def get_sgns_for_doubloons(doubloons, signatures, search_date, base_data): if not doubloons: return None limit = 50 nbase = [0, 0] def handler(bid, json, data): if not json['facets']['signature']: return for facets in json['facets']['signature']: sgn = facets['term'] _facets = facets['facets'] nums = data[sgn] if isinstance(nums, list): data[sgn] = nums = {b: copy.copy(nbase) for b in data[sgn]} if bid in nums: n = nums[bid] n[RAW] = facets['count'] N = len(_facets['install_time']) if N == limit: N = _facets['cardinality_install_time']['value'] n[INSTALLS] = N base_params = { 'build_id': '', 'product': '', 'release_channel': '', 'signature': ['=' + s for s in signatures], 'date': search_date, '_aggs.signature': ['install_time', '_cardinality.install_time'], '_results_number': 0, '_facets': 'product', '_facets_size': limit } queries = [] for bid, pcvs in doubloons.items(): bparams = copy.deepcopy(base_params) bparams['build_id'] = bid bid = utils.get_build_date(bid) for pcv in pcvs: params = copy.deepcopy(bparams) prod, chan, ver = pcv params['product'] = prod params['release_channel'] = chan params['version'] = ver hdler = functools.partial(handler, bid) queries.append( Query(socorro.SuperSearch.URL, params=params, handler=hdler, handlerdata=base_data[prod][chan])) socorro.SuperSearch(queries=queries).wait()
def get_signatures(channels, product='Firefox', date='today', query={}, ndays=7, N=50): today = utils.get_date_ymd(date) tomorrow = today + relativedelta(days=1) few_days_ago = today - relativedelta(days=ndays) search_date = socorro.SuperSearch.get_search_date(few_days_ago, tomorrow) base = {few_days_ago + relativedelta(days=i): 0 for i in range(ndays + 1)} data = {chan: defaultdict(lambda: copy.copy(base)) for chan in channels} def handler(skip_pats, json, data): if json['errors'] or not json['facets']['histogram_date']: return for facets in json['facets']['histogram_date']: date = utils.get_date_ymd(facets['term']) signatures = facets['facets']['signature'] for signature in signatures: if any(p.match(signature) for p in skip_pats): continue total = signature['count'] sgn = signature['term'] data[sgn][date] += total params = { 'product': product, 'date': search_date, 'release_channel': '', '_histogram.date': 'signature', '_results_number': 0, '_facets_size': 10000 } params.update(query) searches = [] for chan in channels: skip_pats = config.get_skiplist_channel(chan) params = copy.deepcopy(params) params['release_channel'] = chan hdler = functools.partial(handler, skip_pats) searches.append( socorro.SuperSearch(params=params, handler=hdler, handlerdata=data[chan])) for s in searches: s.wait() for chan in channels: gather(data[chan]) return get_top_signatures(data, product, N=N)
def test_search(self): data = {} socorro.SuperSearch(params={'product': 'Firefox', 'signature': '~OOM', '_columns': ['uuid', 'build_id'], '_results_number': 0, '_facets': ['product']}, handler=lambda j, d: d.update(j), handlerdata=data).wait() self.assertIsNotNone(data)
def get_fenix_buildids(channels): # We don't have build info for Fenix on buildhub # so we get them from crash-stats date = lmdutils.get_date_ymd('today') - relativedelta(months=3) min_bid = date.strftime('%Y%m%d000000') date = date.strftime('%Y-%m-%d') params = { 'product': ['Fenix'], 'date': '>=' + date, 'build_id': '>=' + min_bid, '_aggs.build_id': 'version', '_results_number': 0, '_facets': 'release_channel', '_facets_size': 1000, } data = {} def handler(chan, json, data): if not json['facets']['build_id']: return info = {} for facets in json['facets']['build_id']: bid = facets['term'] versions = facets['facets']['version'] relevant = max(versions, key=lambda x: x['count']) # Workaround to remove "noise" in release channel. # Anyway all this stuff will be removed once we have # build data on buildhub. if chan != 'release' or relevant['count'] >= 200: version = relevant['term'] info[str(bid)] = version info = sorted(info.items()) data[chan] = [list(x) for x in info] queries = [] for chan in channels: params = params.copy() params['release_channel'] = chan hdler = functools.partial(handler, chan) queries.append( Query(socorro.SuperSearch.URL, params=params, handler=hdler, handlerdata=data)) ss = socorro.SuperSearch(queries=queries) ss.wait() ss.session.close() return data
def test_bugs(self): signature = [] socorro.SuperSearch(params={'product': 'Firefox', 'signature': '~OOM', '_results_number': 0, '_facets': ['signature']}, handler=lambda j, d: d.extend([j['facets']['signature'][0]['term'], j['facets']['signature'][1]['term']]), handlerdata=signature).wait() self.assertEqual(len(signature), 2) bugs = socorro.Bugs.get_bugs(signature) self.assertIsNotNone(bugs)
def get_proto_big(product, signatures, search_date, channel): """Get proto-signatures for signatures which have a high # of crashes (>=500)""" logger.info("Get proto-signatures (big) for {}-{}: started.".format( product, channel)) def handler(bid, threshold, json, data): if not json["facets"]["proto_signature"]: return installs = json["facets"]["cardinality_install_time"]["value"] data["installs"][bid] = 1 if installs == 0 else installs for facets in json["facets"]["proto_signature"]: protos = data["protos"][bid] if len(protos) < threshold: proto = facets["term"] count = facets["count"] uuid = facets["facets"]["uuid"][0]["term"] protos.append({"proto": proto, "count": count, "uuid": uuid}) threshold = config.get_threshold("protos", product, channel) base_params = { "product": product, "release_channel": utils.get_search_channel(channel), "date": search_date, "build_id": "", "signature": "", "_aggs.proto_signature": "uuid", "_results_number": 0, "_facets": "_cardinality.install_time", "_facets_size": threshold, } sgns_by_bids = utils.get_sgns_by_bids(signatures) for bid, all_signatures in sgns_by_bids.items(): params = copy.deepcopy(base_params) params["build_id"] = utils.get_buildid(bid) queries = [] hdler = functools.partial(handler, bid, threshold) for sgn in all_signatures: params = copy.deepcopy(params) params["signature"] = "=" + sgn queries.append( Query( socorro.SuperSearch.URL, params=params, handler=hdler, handlerdata=signatures[sgn], )) socorro.SuperSearch(queries=queries).wait() logger.info("Get proto-signatures (big) for {}-{}: finished.".format( product, channel))
def test_processed(self): uuid = [] socorro.SuperSearch(params={'product': 'Firefox', 'signature': '~OOM', '_columns': ['uuid'], '_results_number': 2, '_facets': ['product']}, handler=lambda j, d: d.extend([j['hits'][0]['uuid'], j['hits'][1]['uuid']]), handlerdata=uuid).wait() self.assertEqual(len(uuid), 2) processed = socorro.ProcessedCrash.get_processed(uuid) self.assertIsNotNone(processed)
def filter_nightly_buildids_helper(fa_bids, fx_bids): """Filter the nightly builds to keep only the relevant ones. """ data = {'Firefox': set(), 'FennecAndroid': set()} params = get_filter_query(fa_bids, fx_bids) socorro.SuperSearch( params=params, handler=filter_by_crashes_num, handlerdata=data ).wait() fa_bids = get_useful_bids(fa_bids, data['FennecAndroid']) fx_bids = get_useful_bids(fx_bids, data['Firefox']) return fa_bids, fx_bids
def get(product, channel, date='yesterday'): yesterday = utils.get_date_ymd(date) versions, throttle = magutils.get_versions(yesterday, product, channel) def handler(rep, catname, json, data): if json['errors'] or not json['facets']['histogram_date']: return {} else: for facets in json['facets']['histogram_date']: total = facets['count'] dt = facets['term'] if rep['process_split']: nonbrowser = 0 pt = facets['facets']['process_type'] d = defaultdict(lambda: 0) for pt in facets['facets']['process_type']: ty = pt['term'] N = pt['count'] d[ty] += int(N * throttle) nonbrowser += N d['browser'] += int((total - nonbrowser) * throttle) data[dt][catname] = dict(d) else: data[dt][catname] = int(total * throttle) queries = [] data = defaultdict(lambda: dict()) today = yesterday + timedelta(days=1) search_date = socorro.SuperSearch.get_search_date(yesterday, today) for catname, rep in reports.items(): if rep['desktoponly'] and product != 'Firefox': continue params = { 'product': product, 'version': versions, 'date': search_date, 'release_channel': channel, '_histogram.date': 'process_type', '_facets_size': 5, '_results_number': 0 } params.update(rep['params']) queries.append( Query(socorro.SuperSearch.URL, params, handler=functools.partial(handler, rep, catname), handlerdata=data)) socorro.SuperSearch(queries=queries).wait() return dict(data)
def get_proto_big(product, signatures, search_date, channel): """Get proto-signatures for signatures which have a high # of crashes (>=500)""" logger.info('Get proto-signatures (big) for {}-{}: started.'.format( product, channel)) def handler(bid, threshold, json, data): if not json['facets']['proto_signature']: return installs = json['facets']['cardinality_install_time']['value'] data['installs'][bid] = 1 if installs == 0 else installs for facets in json['facets']['proto_signature']: protos = data['protos'][bid] if len(protos) < threshold: proto = facets['term'] count = facets['count'] uuid = facets['facets']['uuid'][0]['term'] protos.append({'proto': proto, 'count': count, 'uuid': uuid}) threshold = config.get_threshold('protos', product, channel) base_params = { 'product': product, 'release_channel': utils.get_search_channel(channel), 'date': search_date, 'build_id': '', 'signature': '', '_aggs.proto_signature': 'uuid', '_results_number': 0, '_facets': '_cardinality.install_time', '_facets_size': threshold } sgns_by_bids = utils.get_sgns_by_bids(signatures) for bid, all_signatures in sgns_by_bids.items(): params = copy.deepcopy(base_params) params['build_id'] = utils.get_buildid(bid) queries = [] hdler = functools.partial(handler, bid, threshold) for sgn in all_signatures: params = copy.deepcopy(params) params['signature'] = '=' + sgn queries.append( Query(socorro.SuperSearch.URL, params=params, handler=hdler, handlerdata=signatures[sgn])) socorro.SuperSearch(queries=queries).wait() logger.info('Get proto-signatures (big) for {}-{}: finished.'.format( product, channel))
def get_sgns_data_helper(data, platforms, signatures, bids_chan, extra, search_date, product, channel=None): """Get the data from Socorro and collect the stats. """ limit = 80 base_params = { 'build_id': [utils.get_buildid(bid) for bid in bids_chan.keys()], 'date': search_date, 'product': product, '_aggs.build_id': [ 'install_time', '_cardinality.install_time', 'startup_crash', 'platform_pretty_version', ], '_results_number': 0, '_facets': 'signature', '_facets_size': limit, } if channel: base_params['release_channel'] = (['beta', 'aurora'] if channel == 'beta' else channel) utils.update_params(base_params, extra) queries = [] for signature in signatures: params = base_params.copy() params['signature'] = '=' + signature hdler = functools.partial(filter_signatures_data, limit, product, platforms, signature, bids_chan) queries.append( Query(socorro.SuperSearch.URL, params=params, handler=hdler, handlerdata=data)) return socorro.SuperSearch(queries=queries)
def get_uuids_fennec(signatures, search_date, channel): """Get the uuids for Fennec java crashes""" logger.info("Get uuids for Fennec-{}: started.".format(channel)) def handler(json, data): if json["errors"] or not json["facets"]["signature"]: return bid = json["facets"]["build_id"][0]["term"] bid = utils.get_build_date(bid) for facets in json["facets"]["signature"]: sgn = facets["term"] count = facets["count"] facets = facets["facets"] uuid = facets["uuid"][0]["term"] protos = data[sgn]["protos"][bid] if not protos: protos.append({"proto": "", "count": count, "uuid": uuid}) base_params = { "product": "Fennec", "release_channel": utils.get_search_channel(channel), "date": search_date, "build_id": "", "signature": "", "_aggs.signature": "uuid", "_results_number": 0, "_facets": "build_id", "_facets_size": 100, } queries = [] sgns_by_bids = utils.get_sgns_by_bids(signatures) for bid, all_signatures in sgns_by_bids.items(): params = copy.deepcopy(base_params) params["build_id"] = utils.get_buildid(bid) for sgns in Connection.chunks(all_signatures, 10): params = copy.deepcopy(params) params["signature"] = ["=" + s for s in sgns] queries.append( Query( socorro.SuperSearch.URL, params=params, handler=handler, handlerdata=signatures, )) socorro.SuperSearch(queries=queries).wait() logger.info("Get uuids for Fennec-{}: finished.".format(channel))
def get_uuids_fennec(signatures, search_date, channel): """Get the uuids for FennecAndroid java crashes""" logger.info('Get uuids for FennecAndroid-{}: started.'.format(channel)) def handler(json, data): if json['errors'] or not json['facets']['signature']: return bid = json['facets']['build_id'][0]['term'] bid = utils.get_build_date(bid) for facets in json['facets']['signature']: sgn = facets['term'] count = facets['count'] facets = facets['facets'] uuid = facets['uuid'][0]['term'] protos = data[sgn]['protos'][bid] if not protos: protos.append({'proto': '', 'count': count, 'uuid': uuid}) base_params = { 'product': 'FennecAndroid', 'release_channel': utils.get_search_channel(channel), 'date': search_date, 'build_id': '', 'signature': '', '_aggs.signature': 'uuid', '_results_number': 0, '_facets': 'build_id', '_facets_size': 100 } queries = [] sgns_by_bids = utils.get_sgns_by_bids(signatures) for bid, all_signatures in sgns_by_bids.items(): params = copy.deepcopy(base_params) params['build_id'] = utils.get_buildid(bid) for sgns in Connection.chunks(all_signatures, 10): params = copy.deepcopy(params) params['signature'] = ['=' + s for s in sgns] queries.append( Query(socorro.SuperSearch.URL, params=params, handler=handler, handlerdata=signatures)) socorro.SuperSearch(queries=queries).wait() logger.info('Get uuids for FennecAndroid-{}: finished.'.format(channel))
def test_signature(self): signature = [] socorro.SuperSearch(params={ 'product': 'Firefox', 'signature': '~OOM', '_results_number': 0, '_facets': ['signature'] }, handler=lambda j, d: d.extend([ j['facets']['signature'][0]['term'], j[ 'facets']['signature'][1]['term'] ]), handlerdata=signature).wait() self.assertEqual(len(signature), 2) trend = socorro.SignatureTrend.get_trend(signature, channel='nightly') self.assertIsNotNone(trend)
def filter_buildids_helper(fa_bids, fx_bids, channel): """Filter the builds to keep only the relevant ones. """ data = {'Firefox': set(), 'FennecAndroid': set()} params = get_filter_query(fa_bids, fx_bids, channel) ss = socorro.SuperSearch( params=params, handler=functools.partial(filter_by_crashes_num, channel), handlerdata=data, ) ss.wait() ss.session.close() fa_bids = get_useful_bids(fa_bids, data['FennecAndroid']) fx_bids = get_useful_bids(fx_bids, data['Firefox']) return fa_bids, fx_bids
def get_changeset(buildid, channel, product): """Trick to get changeset for a particular buildid/channel/product""" search_date = '>=' + lmdutils.get_date_str(buildid) buildid = utils.get_buildid(buildid) logger.info('Get changeset for {}-{}-{}.'.format(buildid, product, channel)) def handler(json, data): pat = re.compile(r'^.*:([0-9a-f]+)$') if not json['facets']['build_id']: return for facets in json['facets']['build_id']: for tf in facets['facets']['topmost_filenames']: m = pat.match(tf['term']) if m: chgset = m.group(1) count = tf['count'] data[chgset] += count params = { 'product': product, 'release_channel': channel, 'build_id': buildid, 'date': search_date, 'topmost_filenames': '@\"hg:hg.mozilla.org/\".*:[0-9a-f]+', '_aggs.build_id': 'topmost_filenames', '_results_number': 0, '_facets': 'product', '_facets_size': 100 } data = defaultdict(lambda: 0) socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait() chgset = None if data: chgset, _ = max(data.items(), key=lambda p: p[1]) chgset = utils.short_rev(chgset) logger.info('Get changeset: finished.') return chgset
def get_changeset(buildid, channel, product): """Trick to get changeset for a particular buildid/channel/product""" search_date = ">=" + lmdutils.get_date_str(buildid) buildid = utils.get_buildid(buildid) logger.info("Get changeset for {}-{}-{}.".format(buildid, product, channel)) def handler(json, data): pat = re.compile(r"^.*:([0-9a-f]+)$") if not json["facets"]["build_id"]: return for facets in json["facets"]["build_id"]: for tf in facets["facets"]["topmost_filenames"]: m = pat.match(tf["term"]) if m: chgset = m.group(1) count = tf["count"] data[chgset] += count params = { "product": product, "release_channel": channel, "build_id": buildid, "date": search_date, "topmost_filenames": '@"hg:hg.mozilla.org/".*:[0-9a-f]+', "_aggs.build_id": "topmost_filenames", "_results_number": 0, "_facets": "product", "_facets_size": 100, } data = defaultdict(lambda: 0) socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait() chgset = None if data: chgset, _ = max(data.items(), key=lambda p: p[1]) chgset = utils.short_rev(chgset) logger.info("Get changeset: finished.") return chgset
def get_by_install_time(channels, product='Firefox', date='today', query={}): today = utils.get_date_ymd(date) tomorrow = today + relativedelta(days=1) six_months_ago = today - relativedelta(weeks=25) search_date = socorro.SuperSearch.get_search_date(six_months_ago, tomorrow) data = {chan: {} for chan in channels} def handler(json, data): if not json['facets']['histogram_date']: return for facets in json['facets']['histogram_date']: date = utils.get_date_ymd(facets['term']) ninstalls = facets['facets']['cardinality_install_time']['value'] data[date] = ninstalls params = { 'product': product, 'date': search_date, 'release_channel': '', '_histogram.date': '_cardinality.install_time', '_results_number': 10 } params.update(query) searches = [] for chan in channels: params = copy.deepcopy(params) params['release_channel'] = chan searches.append( socorro.SuperSearch(params=params, handler=handler, handlerdata=data[chan])) for s in searches: s.wait() return data
def get_buildids(search_date, channels, products): data = {p: {c: list() for c in channels} for p in products} def handler(chan, threshold, json, data): if not json['facets']['build_id']: return for facets in json['facets']['build_id']: count = facets['count'] if count >= threshold: version = facets['facets']['version'][0]['term'] if chan != 'beta' or not (version.endswith('a2') or version.endswith('b0')): buildid = facets['term'] data.append((buildid, version, count)) params = { 'product': '', 'release_channel': '', 'date': search_date, '_aggs.build_id': 'version', '_results_number': 0, '_facets': 'release_channel', '_facets_size': 1000 } queries = [] for prod in products: pparams = copy.deepcopy(params) pparams['product'] = prod for chan in channels: params = copy.deepcopy(pparams) if chan == 'beta' and prod == 'Firefox': params['release_channel'] = ['beta', 'aurora'] else: params['release_channel'] = chan threshold = config.get_min_total(prod, chan) hdler = functools.partial(handler, chan, threshold) queries.append( Query(socorro.SuperSearch.URL, params=params, handler=hdler, handlerdata=data[prod][chan])) socorro.SuperSearch(queries=queries).wait() for prod, info in data.items(): data[prod] = remove_dup_versions(info) res = {} for prod, info in data.items(): res[prod] = d = {} for chan, bids in info.items(): bids = sorted(bids) min_v = config.get_versions(prod, chan) if len(bids) > min_v: bids = bids[-min_v:] bids = [(utils.get_build_date(bid), v) for bid, v in bids] d[chan] = bids logger.info('Buildids for {}/{} got.'.format(products, channels)) return res
def get_sgns_data(channels, versions, signatures, products, date='today'): today = lmdutils.get_date_ymd(date) few_days_ago = today - relativedelta(days=config.get_limit()) search_date = socorro.SuperSearch.get_search_date(few_days_ago) nbase = [0, 0] data = {} bids, all_bids, all_versions, doubloons = get_all_buildids(versions) for product in products: data[product] = d1 = {} b1 = bids[product] for chan in channels: d1[chan] = d2 = {} b2 = b1[chan] for signature in signatures: d2[signature] = b2 limit = 80 def handler(sgn, json, data): if not json['facets']['build_id']: return for facets in json['facets']['build_id']: bid = facets['term'] prod, chan, ver = all_bids[str(bid)] _facets = facets['facets'] chans = set() for c in _facets['release_channel']: c = c['term'] if c == 'aurora': chans.add('beta') else: chans.add(c) if len(_facets['product']) != 1 or len(chans) != 1: bid = str(bid) doubloons[bid] = [(prod, chan, ver)] else: dpc = data[prod][chan] nums = dpc[sgn] bid = utils.get_build_date(bid) if isinstance(nums, list): dpc[sgn] = nums = {b: copy.copy(nbase) for b in dpc[sgn]} if bid in nums: n = nums[bid] n[RAW] = facets['count'] N = len(_facets['install_time']) if N == limit: N = _facets['cardinality_install_time']['value'] n[INSTALLS] = N base_params = { 'build_id': list(all_bids.keys()), 'signature': '', 'version': all_versions, 'date': search_date, '_aggs.build_id': [ 'install_time', '_cardinality.install_time', 'release_channel', 'product' ], '_results_number': 0, '_facets': 'signature', '_facets_size': limit } queries = [] for signature in signatures: params = copy.deepcopy(base_params) params['signature'] = '=' + signature hdler = functools.partial(handler, signature) queries.append( Query(socorro.SuperSearch.URL, params=params, handler=hdler, handlerdata=data)) socorro.SuperSearch(queries=queries).wait() get_sgns_for_doubloons(doubloons, signatures, search_date, data) res = defaultdict(lambda: defaultdict(lambda: dict())) for p, i in data.items(): for c, j in i.items(): for sgn, numbers in j.items(): if not isinstance(numbers, list): res[p][c][sgn] = numbers return res
def get_sgns_by_buildid(signatures, channels, products, search_date, bids): base = utils.get_base_list(bids) limit = config.get_limit_facets() logger.info('Get crash numbers for {}-{}: started.'.format( products, channels)) def handler(base, index, json, data): if not json['facets']['signature']: return for facets in json['facets']['signature']: sgn = facets['term'] if sgn not in data: data[sgn] = copy.deepcopy(base) data[sgn][index][RAW] = facets['count'] facets = facets['facets'] n = len(facets['install_time']) if n == limit: n = facets['cardinality_install_time']['value'] data[sgn][index][INSTALLS] = n base_params = { 'product': '', 'release_channel': '', 'build_id': '', 'date': search_date, '_aggs.signature': ['install_time', '_cardinality.install_time'], '_results_number': 0, '_facets': 'release_channel', '_facets_size': limit } ratios = {} res = {} for prod in products: pparams = copy.deepcopy(base_params) pparams['product'] = prod base_prod = base[prod] bids_prod = bids[prod] ratios[prod] = ratios_prod = {} res[prod] = res_prod = {} for chan in channels: params = copy.deepcopy(pparams) params['release_channel'] = chan data = {} sbids = [b for b, _ in bids_prod[chan]] queries = [] for index, bid in enumerate(sbids): params = copy.deepcopy(params) params['build_id'] = utils.get_buildid(bid) hdler = functools.partial(handler, base_prod[chan], index) queries.append( Query(socorro.SuperSearch.URL, params=params, handler=hdler, handlerdata=data)) socorro.SuperSearch(queries=queries).wait() ratios_prod[chan] = tools.get_global_ratios(data) # now we've ratios, we can remove useless signatures res_prod[chan] = {s: n for s, n in data.items() if s in signatures} logger.info('Get crash numbers for {}-{}: finished.'.format( products, channels)) return res, ratios
def get(channel, date, product='Firefox', duration=11, tc_limit=50, crash_type='all', startup=False): """Get crashes info Args: channel (str): the channel date (str): the final date product (Optional[str]): the product duration (Optional[int]): the duration to retrieve the data tc_limit (Optional[int]): the number of topcrashes to load crash_type (Optional[str]): 'all' (default) or 'browser' or 'content' or 'plugin' Returns: dict: contains all the info relative to the crashes """ channel = channel.lower() version = v[channel] sys.stdout.write('Getting version information from Socorro...') sys.stdout.flush() versions = versions_util.get_channel_versions(channel, product) sys.stdout.write(' ✔\n') sys.stdout.flush() if crash_type and isinstance(crash_type, six.string_types): crash_type = [crash_type] _date = utils.get_date_ymd(date) start_date = utils.get_date_str(_date - timedelta(duration - 1)) end_date = utils.get_date_str(_date) signatures = {} def signature_handler(json): for signature in json['facets']['signature']: signatures[signature['term']] = [signature['count'], 0, 0, 0, 0, 0] for platform in signature['facets']['platform']: if platform['term'] == 'Linux': signatures[signature['term']][3] = platform['count'] elif platform['term'] == 'Windows NT': signatures[signature['term']][1] = platform['count'] elif platform['term'] == 'Mac OS X': signatures[signature['term']][2] = platform['count'] for startup_crash in signature['facets']['startup_crash']: if startup_crash['term'] in ['1', 'T']: signatures[signature['term']][4] += startup_crash['count'] signatures[signature['term']][5] = signature['facets'][ 'cardinality_install_time']['value'] params = { 'product': product, 'version': versions, 'date': socorro.SuperSearch.get_search_date(start_date, end_date), 'release_channel': channel, '_aggs.signature': ['platform', '_cardinality.install_time', 'startup_crash'], '_results_number': 0, '_facets_size': tc_limit, } if startup: params['startup_crash'] = True sys.stdout.write('Getting top signatures from Socorro...') sys.stdout.flush() socorro.SuperSearch(params=params, handler=signature_handler).wait() sys.stdout.write(' ✔\n') sys.stdout.flush() bug_flags = [ 'resolution', 'id', 'last_change_time', 'cf_tracking_firefox' + str(version) ] for i in range(int(version), int(v['nightly']) + 1): bug_flags.append('cf_status_firefox' + str(i)) # TODO: too many requests... should be improved with chunks bugs = {} # TODO: Use regexp, when the Bugzilla bug that prevents them from working will be fixed. base = { 'j_top': 'OR', 'o1': 'substring', 'f1': 'cf_crash_signature', 'v1': None, 'o2': 'substring', 'f2': 'cf_crash_signature', 'v2': None, 'o3': 'substring', 'f3': 'cf_crash_signature', 'v3': None, 'o4': 'substring', 'f4': 'cf_crash_signature', 'v4': None, 'include_fields': bug_flags } queries = [] for sgn in signatures.keys(): cparams = base.copy() cparams['v1'] = '[@' + sgn + ']' cparams['v2'] = '[@ ' + sgn + ' ]' cparams['v3'] = '[@ ' + sgn + ']' cparams['v4'] = '[@' + sgn + ' ]' bugs[sgn] = [] queries.append( Query(Bugzilla.API_URL, cparams, __bug_handler, bugs[sgn])) res_bugs = Bugzilla(queries=queries) # we have stats by signature in self.signatures # for each signature get the number of crashes on the last X days # so get the signature trend trends = {} default_trend = {} for i in range(duration): default_trend[_date - timedelta(i)] = 0 base = { 'product': product, 'version': versions, 'signature': None, 'date': socorro.SuperSearch.get_search_date(start_date, end_date), 'release_channel': channel, '_results_number': 0, '_histogram.date': ['signature'], '_histogram_interval': 1 } queries = [] for sgns in Connection.chunks( list(map(lambda sgn: '=' + sgn, signatures.keys())), 10): sgn_group = [] for sgn in sgns: if sum(len(s) for s in sgn_group) >= 1000: cparams = base.copy() cparams['signature'] = sgn_group queries.append( Query(socorro.SuperSearch.URL, cparams, functools.partial(__trend_handler, default_trend), trends)) sgn_group = [] sgn_group.append(sgn) if len(sgn_group) > 0: cparams = base.copy() cparams['signature'] = sgn_group queries.append( Query(socorro.SuperSearch.URL, cparams, functools.partial(__trend_handler, default_trend), trends)) sys.stdout.write('Getting trends for top signatures from Socorro...') sys.stdout.flush() socorro.SuperSearch(queries=queries).wait() sys.stdout.write(' ✔\n') sys.stdout.flush() for sgn, trend in trends.items(): signatures[sgn] = (signatures[sgn], [ trend[key] for key in sorted(trend.keys(), reverse=True) ]) _signatures = {} # order self.signatures by crash count sorted_signatures = sorted(signatures.items(), key=lambda x: x[1][0][0], reverse=True) i = 1 for s in sorted_signatures: _signatures[s[0]] = i # top crash rank i += 1 sys.stdout.write( 'Getting bugs linked to the top signatures from Bugzilla...') sys.stdout.flush() while True: try: for r in res_bugs.results: r.result(timeout=2) break except TimeoutError: sys.stdout.write('.') sys.stdout.flush() sys.stdout.write(' ✔\n') sys.stdout.flush() # TODO: In the first query to get the bugs, also get dupe_of and avoid the first query # in follow_dup (so modify follow_dup to accept both a bug ID or a bug object). queries = [] for sgn in signatures.keys(): duplicate_ids = [ bug['id'] for bug in bugs[sgn] if bug['resolution'] == 'DUPLICATE' ] # Remove bugs resolved as DUPLICATE from the list of bugs associated to the signature. bugs[sgn] = [ bug for bug in bugs[sgn] if bug['id'] not in duplicate_ids ] # Find duplicates for bugs resolved as DUPLICATE. duplicates = { k: v for k, v in Bugzilla.follow_dup(duplicate_ids).items() if v is not None } duplicate_targets = [ bug_id for bug_id in duplicates.values() if int(bug_id) not in [bug['id'] for bug in bugs[sgn]] ] if len(duplicate_targets) == 0: continue # Get info about bugs that the DUPLICATE bugs have been duped to. params = { 'id': ','.join(duplicate_targets), 'include_fields': bug_flags, } queries.append( Query(Bugzilla.API_URL, params, __bug_handler, bugs[sgn])) sys.stdout.write( 'Resolving duplicate bugs to the bugs they\'ve been duplicated to...') sys.stdout.flush() Bugzilla(queries=queries).wait() sys.stdout.write(' ✔\n') sys.stdout.flush() for sgn, stats in signatures.items(): # stats is 2-uple: ([count, win_count, mac_count, linux_count, startup_count], trend) startup_percent = float(stats[0][4]) / float(stats[0][0]) _signatures[sgn] = { 'tc_rank': _signatures[sgn], 'crash_count': stats[0][0], 'estimated_user_count': stats[0][5], 'startup_percent': startup_percent, 'bugs': bugs[sgn] } return { 'start_date': start_date, 'end_date': end_date, 'versions': list(versions), 'signatures': _signatures, }