def test_revisions(self): data1 = { 'first': None, 'second': None, } data2 = {'first': None} def handler1(response): if '1584ba8c1b86' in response: data1['first'] = response elif 'f5578fdc50ef' in response: data1['second'] = response def handler2(response): data2['first'] = response hgmozilla.Revision(queries=[ Query(hgmozilla.RawRevision.get_url('nightly'), [{ 'node': 'f5578fdc50ef' }, { 'node': '1584ba8c1b86' }], handler1), Query(hgmozilla.RawRevision.get_url('nightly'), {'node': '1584ba8c1b86'}, handler2), ]).wait() self.assertIn('# Node ID 1584ba8c1b86f9c4de5ccda5241cef36e80f042c', data1['first']) self.assertIn('# Node ID f5578fdc50ef11b7f12451c88297f327abb0e9da', data1['second']) self.assertIn('# Node ID 1584ba8c1b86f9c4de5ccda5241cef36e80f042c', data2['first'])
def test_queries(self): bugs = {} def bughandler(data): bug = data['bugs'][0] bugs[bug['id']] = bug queries = [ Query(bugzilla.Bugzilla.API_URL, {'id': '12345'}, bughandler), Query(bugzilla.Bugzilla.API_URL, {'id': '12346'}, bughandler) ] bugzilla.Bugzilla(queries=queries, bughandler=bughandler).wait() self.assertEqual(bugs[12345]['id'], 12345) self.assertEqual(bugs[12345]['resolution'], u'FIXED') self.assertEqual(bugs[12345]['assigned_to'], u'*****@*****.**') self.assertEqual( bugs[12345]['summary'], u'[DOGFOOD] Unable to Forward a message received as an Inline page or an attachment' ) self.assertEqual(bugs[12346]['id'], 12346) self.assertEqual(bugs[12346]['resolution'], u'FIXED') self.assertEqual(bugs[12346]['assigned_to'], u'*****@*****.**') self.assertEqual(bugs[12346]['summary'], u'nsOutputFileStream should buffer the output')
def get_data(params): if os.path.isfile(MySuperSearch.PATH): with open(MySuperSearch.PATH, 'r') as In: data = json.load(In) else: data = {} for k, v in params.items(): if isinstance(v, list): params[k] = sorted(v) params_str = get_params_query(params) if params_str not in data: hdata = [] Connection( socorro.Socorro.API_URL, queries=Query(_SuperSearch.URL, params, MySuperSearch.handler, hdata), ).wait() data[params_str] = hdata[0] dumpjson(MySuperSearch.PATH, data) return hdata[0] return data[params_str]
def filter_from_hg(self, bzdata, user_info): """Get the bugs where an associated revision contains the bug id in the description""" def handler_rev(bugid, json, data): if bugid in json["desc"] and not utils.is_backout(json): user = json["user"] if bugid not in data: data[bugid] = set() m = HG_MAIL.match(user) if m: hgname = m.group(1).strip() hgmail = m.group(2).strip() data[bugid].add((hgname, hgmail)) url = hgmozilla.Revision.get_url("nightly") queries = [] for bugid, info in bzdata.items(): hdler = functools.partial(handler_rev, bugid) for rev in info["revisions"]: queries.append(Query(url, {"node": rev}, hdler, self.hgdata)) if queries: hgmozilla.Revision(queries=queries).wait() self.set_autofixable(bzdata, user_info) return self.hgdata
def get_pushdates(chan_rev): """Get the pushdates of the given channel/revision. """ data = {} queries = [] for chan, revs in chan_rev.items(): if chan.startswith('esr'): if 'esr' not in data: data['esr'] = pd = [] else: pd = data['esr'] else: data[chan] = pd = [] for rev in revs: queries.append(Query( hgmozilla.Revision.get_url(chan), {'node': rev}, get_pushdate, pd )) if queries: return hgmozilla.Revision(queries=queries), data return None, data
def get_hg_patches(self, bugs): url = hgmozilla.RawRevision.get_url('nightly') queries = [] def handler(patch, data): info = self.patch_analysis(patch) if 'addlines' not in data: data.update(info) else: for k, v in info.items(): data[k] += v for info in bugs.values(): for rev, i in info['land'].items(): if not i['backedout']: queries.append(Query(url, {'node': rev}, handler, info)) if queries: hgmozilla.Revision(queries=queries).wait() torm = [] for bug, info in bugs.items(): landed_patches = [v['backedout'] for v in info['land'].values()].count(False) # bug with only backouts if landed_patches == 0: torm.append(bug) else: info['landed_patches'] = landed_patches # Remove bugs that we don't want to show for bug in torm: del bugs[bug]
def get_files(info, verbose=False): """Get info from different backtraces Args: info (dict): proto -> uuid Returns: dict: info about the different backtraces """ def handler(proto, json, data): jd = json['json_dump'] if 'threads' in jd and 'crashedThread' in json: thread_nb = json['crashedThread'] if thread_nb is not None: frames = jd['threads'][thread_nb]['frames'] data[proto] = set( map(lambda f: get_file(f['file']), filter(lambda f: 'file' in f, frames))) data = {} queries = [] for proto, value in info.items(): queries.append( Query(socorro.ProcessedCrash.URL, params={'crash_id': value['uuid']}, handler=functools.partial(handler, proto), handlerdata=data)) if queries: socorro.ProcessedCrash(queries=queries).wait() return data
def get_proto_small(product, signatures, search_date, channel): """Get the proto-signatures for signature with a small number of crashes. Since we 'must' aggregate uuid on proto-signatures, to be faster we query several signatures: it's possible because we know that card(proto) <= card(crashes) for a given signature.""" logger.info('Get proto-signatures (small) for {}-{}: started.'.format( product, channel)) def handler(bid, threshold, json, data): if not json['facets']['proto_signature']: return for facets in json['facets']['proto_signature']: _facets = facets['facets'] sgn = _facets['signature'][0]['term'] protos = data[sgn]['protos'][bid] if len(protos) < threshold: proto = facets['term'] count = facets['count'] uuid = _facets['uuid'][0]['term'] protos.append({'proto': proto, 'count': count, 'uuid': uuid}) for facets in json['facets']['signature']: sgn = facets['term'] count = facets['facets']['cardinality_install_time']['value'] data[sgn]['installs'][bid] = 1 if count == 0 else count limit = config.get_limit_facets() threshold = config.get_threshold('protos', product, channel) base_params = { 'product': product, 'release_channel': utils.get_search_channel(channel), 'date': search_date, 'build_id': '', 'signature': '', '_aggs.proto_signature': ['uuid', 'signature'], '_aggs.signature': '_cardinality.install_time', '_results_number': 0, '_facets': 'release_channel', '_facets_size': limit } sgns_by_bids = utils.get_sgns_by_bids(signatures) for bid, all_signatures in sgns_by_bids.items(): params = copy.deepcopy(base_params) params['build_id'] = utils.get_buildid(bid) queries = [] hdler = functools.partial(handler, bid, threshold) for sgns in Connection.chunks(all_signatures, 10): params = copy.deepcopy(params) params['signature'] = ['=' + s for s in sgns] queries.append( Query(socorro.SuperSearch.URL, params=params, handler=hdler, handlerdata=signatures)) socorro.SuperSearch(queries=queries).wait() logger.info('Get proto-signatures (small) for {}-{}: finished.'.format( product, channel))
def analyze_gfx_critical_errors(signature='', product='Firefox', channel=['all'], versions=[], start_date=''): if product.lower() == 'firefox': product = 'Firefox' if channel == [] or channel[0].lower() == 'all': channel = ['release', 'beta', 'nightly'] if product == 'Firefox': channel.append('esr') else: channel = [c.lower() for c in channel] if not versions: base_versions = libmozdata.versions.get(base=True) versions_by_channel = socorro.ProductVersions.get_info_from_major( base_versions, product=product) versions = [] for v1 in versions_by_channel.values(): for v2 in v1: versions.append(v2['version']) if not start_date: start_date = utils.get_date('today', 7) gfx_critical_errors = get_critical_errors() count = {} def handler(json, gfx_critical_error): count[gfx_critical_error] = json['total'] base_params = { 'product': product, 'release_channel': channel, 'version': versions, 'date': '>=' + start_date, '_results_number': 0, '_facets_size': 0, } if signature: base_params['signature'] = signature queries = [] for gfx_critical_error in gfx_critical_errors: params = base_params.copy() params['graphics_critical_error'] = '~' + gfx_critical_error queries.append( Query(socorro.SuperSearch.URL, params=params, handler=handler, handlerdata=gfx_critical_error)) socorro.SuperSearch(queries=queries).wait() return count
def filter_nightly_buildids(buildids): def handler(threshold, json, data): if not json['facets']['build_id']: return for facets in json['facets']['build_id']: count = facets['count'] if count >= threshold: bid = utils.get_build_date(facets['term']) data[bid] = True params = { 'product': '', 'build_id': '', 'date': '', 'release_channel': 'nightly', '_aggs.build_id': 'release_channel', '_results_number': 0, '_facets': 'release_channel', '_facets_size': 1000 } data = {'Firefox': None, 'FennecAndroid': None} queries = [] for prod in data.keys(): pparams = copy.deepcopy(params) pparams['product'] = prod threshold = config.get_min_total(prod, 'nightly') data[prod] = D = {} for bids in Connection.chunks(buildids[prod]['nightly'], chunk_size=128): pparams = copy.deepcopy(pparams) pparams['date'] = '>=' + bids[0][0].strftime('%Y-%m-%d') pparams['build_id'] = L = [] for b in bids: L.append(utils.get_buildid(b[0])) D[b[0]] = False hdler = functools.partial(handler, threshold) queries.append( Query(socorro.SuperSearch.URL, params=pparams, handler=hdler, handlerdata=D)) socorro.SuperSearch(queries=queries).wait() for prod, info in data.items(): bids = buildids[prod]['nightly'] L = [(bid, info[bid[0]]) for bid in bids] for i in range(len(L) - 1, -1, -1): if not L[i][1]: L[i] = (L[i][0], True) else: break buildids[prod]['nightly'] = [x[0] for x in L if x[1]]
def get_sgns_for_doubloons(doubloons, signatures, search_date, base_data): if not doubloons: return None limit = 50 nbase = [0, 0] def handler(bid, json, data): if not json['facets']['signature']: return for facets in json['facets']['signature']: sgn = facets['term'] _facets = facets['facets'] nums = data[sgn] if isinstance(nums, list): data[sgn] = nums = {b: copy.copy(nbase) for b in data[sgn]} if bid in nums: n = nums[bid] n[RAW] = facets['count'] N = len(_facets['install_time']) if N == limit: N = _facets['cardinality_install_time']['value'] n[INSTALLS] = N base_params = { 'build_id': '', 'product': '', 'release_channel': '', 'signature': ['=' + s for s in signatures], 'date': search_date, '_aggs.signature': ['install_time', '_cardinality.install_time'], '_results_number': 0, '_facets': 'product', '_facets_size': limit } queries = [] for bid, pcvs in doubloons.items(): bparams = copy.deepcopy(base_params) bparams['build_id'] = bid bid = utils.get_build_date(bid) for pcv in pcvs: params = copy.deepcopy(bparams) prod, chan, ver = pcv params['product'] = prod params['release_channel'] = chan params['version'] = ver hdler = functools.partial(handler, bid) queries.append( Query(socorro.SuperSearch.URL, params=params, handler=hdler, handlerdata=base_data[prod][chan])) socorro.SuperSearch(queries=queries).wait()
def get_fenix_buildids(channels): # We don't have build info for Fenix on buildhub # so we get them from crash-stats date = lmdutils.get_date_ymd('today') - relativedelta(months=3) min_bid = date.strftime('%Y%m%d000000') date = date.strftime('%Y-%m-%d') params = { 'product': ['Fenix'], 'date': '>=' + date, 'build_id': '>=' + min_bid, '_aggs.build_id': 'version', '_results_number': 0, '_facets': 'release_channel', '_facets_size': 1000, } data = {} def handler(chan, json, data): if not json['facets']['build_id']: return info = {} for facets in json['facets']['build_id']: bid = facets['term'] versions = facets['facets']['version'] relevant = max(versions, key=lambda x: x['count']) # Workaround to remove "noise" in release channel. # Anyway all this stuff will be removed once we have # build data on buildhub. if chan != 'release' or relevant['count'] >= 200: version = relevant['term'] info[str(bid)] = version info = sorted(info.items()) data[chan] = [list(x) for x in info] queries = [] for chan in channels: params = params.copy() params['release_channel'] = chan hdler = functools.partial(handler, chan) queries.append( Query(socorro.SuperSearch.URL, params=params, handler=hdler, handlerdata=data)) ss = socorro.SuperSearch(queries=queries) ss.wait() ss.session.close() return data
def test_revisions(self): data1 = { 'first': {}, 'second': {}, } data2 = {} def handler1(json, data): if 'tip' in json['tags']: data['first'].update(json) else: data['second'].update(json) def handler2(json, data): data.update(json) hgmozilla.Revision(queries=[ Query(hgmozilla.Revision.get_url('nightly'), [{ 'node': 'default' }, { 'node': '1584ba8c1b86' }], handler1, data1), Query(hgmozilla.Revision.get_url('nightly'), {'node': 'default'}, handler2, data2), ]).wait() for rev in [data1['first'], data1['second'], data2]: self.assertIn('pushid', rev) self.assertIn('pushuser', rev) self.assertIn('pushdate', rev) self.assertIn('user', rev) self.assertIn('branch', rev) self.assertIn('date', rev) self.assertIn('desc', rev) self.assertIn('node', rev) self.assertEqual(data1['second']['node'], '1584ba8c1b86f9c4de5ccda5241cef36e80f042c') self.assertNotEqual(data1['first']['node'], data1['second']['node']) self.assertEqual(data1['first']['node'], data2['node'])
def get_proto_big(product, signatures, search_date, channel): """Get proto-signatures for signatures which have a high # of crashes (>=500)""" logger.info("Get proto-signatures (big) for {}-{}: started.".format( product, channel)) def handler(bid, threshold, json, data): if not json["facets"]["proto_signature"]: return installs = json["facets"]["cardinality_install_time"]["value"] data["installs"][bid] = 1 if installs == 0 else installs for facets in json["facets"]["proto_signature"]: protos = data["protos"][bid] if len(protos) < threshold: proto = facets["term"] count = facets["count"] uuid = facets["facets"]["uuid"][0]["term"] protos.append({"proto": proto, "count": count, "uuid": uuid}) threshold = config.get_threshold("protos", product, channel) base_params = { "product": product, "release_channel": utils.get_search_channel(channel), "date": search_date, "build_id": "", "signature": "", "_aggs.proto_signature": "uuid", "_results_number": 0, "_facets": "_cardinality.install_time", "_facets_size": threshold, } sgns_by_bids = utils.get_sgns_by_bids(signatures) for bid, all_signatures in sgns_by_bids.items(): params = copy.deepcopy(base_params) params["build_id"] = utils.get_buildid(bid) queries = [] hdler = functools.partial(handler, bid, threshold) for sgn in all_signatures: params = copy.deepcopy(params) params["signature"] = "=" + sgn queries.append( Query( socorro.SuperSearch.URL, params=params, handler=hdler, handlerdata=signatures[sgn], )) socorro.SuperSearch(queries=queries).wait() logger.info("Get proto-signatures (big) for {}-{}: finished.".format( product, channel))
def get(product, channel, date='yesterday'): yesterday = utils.get_date_ymd(date) versions, throttle = magutils.get_versions(yesterday, product, channel) def handler(rep, catname, json, data): if json['errors'] or not json['facets']['histogram_date']: return {} else: for facets in json['facets']['histogram_date']: total = facets['count'] dt = facets['term'] if rep['process_split']: nonbrowser = 0 pt = facets['facets']['process_type'] d = defaultdict(lambda: 0) for pt in facets['facets']['process_type']: ty = pt['term'] N = pt['count'] d[ty] += int(N * throttle) nonbrowser += N d['browser'] += int((total - nonbrowser) * throttle) data[dt][catname] = dict(d) else: data[dt][catname] = int(total * throttle) queries = [] data = defaultdict(lambda: dict()) today = yesterday + timedelta(days=1) search_date = socorro.SuperSearch.get_search_date(yesterday, today) for catname, rep in reports.items(): if rep['desktoponly'] and product != 'Firefox': continue params = { 'product': product, 'version': versions, 'date': search_date, 'release_channel': channel, '_histogram.date': 'process_type', '_facets_size': 5, '_results_number': 0 } params.update(rep['params']) queries.append( Query(socorro.SuperSearch.URL, params, handler=functools.partial(handler, rep, catname), handlerdata=data)) socorro.SuperSearch(queries=queries).wait() return dict(data)
def get_proto_big(product, signatures, search_date, channel): """Get proto-signatures for signatures which have a high # of crashes (>=500)""" logger.info('Get proto-signatures (big) for {}-{}: started.'.format( product, channel)) def handler(bid, threshold, json, data): if not json['facets']['proto_signature']: return installs = json['facets']['cardinality_install_time']['value'] data['installs'][bid] = 1 if installs == 0 else installs for facets in json['facets']['proto_signature']: protos = data['protos'][bid] if len(protos) < threshold: proto = facets['term'] count = facets['count'] uuid = facets['facets']['uuid'][0]['term'] protos.append({'proto': proto, 'count': count, 'uuid': uuid}) threshold = config.get_threshold('protos', product, channel) base_params = { 'product': product, 'release_channel': utils.get_search_channel(channel), 'date': search_date, 'build_id': '', 'signature': '', '_aggs.proto_signature': 'uuid', '_results_number': 0, '_facets': '_cardinality.install_time', '_facets_size': threshold } sgns_by_bids = utils.get_sgns_by_bids(signatures) for bid, all_signatures in sgns_by_bids.items(): params = copy.deepcopy(base_params) params['build_id'] = utils.get_buildid(bid) queries = [] hdler = functools.partial(handler, bid, threshold) for sgn in all_signatures: params = copy.deepcopy(params) params['signature'] = '=' + sgn queries.append( Query(socorro.SuperSearch.URL, params=params, handler=hdler, handlerdata=signatures[sgn])) socorro.SuperSearch(queries=queries).wait() logger.info('Get proto-signatures (big) for {}-{}: finished.'.format( product, channel))
def get_uuids_fennec(signatures, search_date, channel): """Get the uuids for Fennec java crashes""" logger.info("Get uuids for Fennec-{}: started.".format(channel)) def handler(json, data): if json["errors"] or not json["facets"]["signature"]: return bid = json["facets"]["build_id"][0]["term"] bid = utils.get_build_date(bid) for facets in json["facets"]["signature"]: sgn = facets["term"] count = facets["count"] facets = facets["facets"] uuid = facets["uuid"][0]["term"] protos = data[sgn]["protos"][bid] if not protos: protos.append({"proto": "", "count": count, "uuid": uuid}) base_params = { "product": "Fennec", "release_channel": utils.get_search_channel(channel), "date": search_date, "build_id": "", "signature": "", "_aggs.signature": "uuid", "_results_number": 0, "_facets": "build_id", "_facets_size": 100, } queries = [] sgns_by_bids = utils.get_sgns_by_bids(signatures) for bid, all_signatures in sgns_by_bids.items(): params = copy.deepcopy(base_params) params["build_id"] = utils.get_buildid(bid) for sgns in Connection.chunks(all_signatures, 10): params = copy.deepcopy(params) params["signature"] = ["=" + s for s in sgns] queries.append( Query( socorro.SuperSearch.URL, params=params, handler=handler, handlerdata=signatures, )) socorro.SuperSearch(queries=queries).wait() logger.info("Get uuids for Fennec-{}: finished.".format(channel))
def get_sgns_data_helper(data, platforms, signatures, bids_chan, extra, search_date, product, channel=None): """Get the data from Socorro and collect the stats. """ limit = 80 base_params = { 'build_id': [utils.get_buildid(bid) for bid in bids_chan.keys()], 'date': search_date, 'product': product, '_aggs.build_id': [ 'install_time', '_cardinality.install_time', 'startup_crash', 'platform_pretty_version', ], '_results_number': 0, '_facets': 'signature', '_facets_size': limit, } if channel: base_params['release_channel'] = (['beta', 'aurora'] if channel == 'beta' else channel) utils.update_params(base_params, extra) queries = [] for signature in signatures: params = base_params.copy() params['signature'] = '=' + signature hdler = functools.partial(filter_signatures_data, limit, product, platforms, signature, bids_chan) queries.append( Query(socorro.SuperSearch.URL, params=params, handler=hdler, handlerdata=data)) return socorro.SuperSearch(queries=queries)
def test_args(self): representation = str( Query('https://www.mozilla.org/', { 'var1': True, 'var2': ['value2', 'value3'], 'var3': None })) self.assertTrue( representation == 'url: https://www.mozilla.org/?var1=True&var2=value2&var2=value3' or representation == 'url: https://www.mozilla.org/?var2=value2&var2=value3&var1=True') representation = str( Query('https://www.mozilla.org/', [{ 'var1': True }, { 'var2': 'marco' }])) self.assertEqual( representation, 'url: https://www.mozilla.org/?var1=True\nurl: https://www.mozilla.org/?var2=marco' )
def get_uuids_fennec(signatures, search_date, channel): """Get the uuids for FennecAndroid java crashes""" logger.info('Get uuids for FennecAndroid-{}: started.'.format(channel)) def handler(json, data): if json['errors'] or not json['facets']['signature']: return bid = json['facets']['build_id'][0]['term'] bid = utils.get_build_date(bid) for facets in json['facets']['signature']: sgn = facets['term'] count = facets['count'] facets = facets['facets'] uuid = facets['uuid'][0]['term'] protos = data[sgn]['protos'][bid] if not protos: protos.append({'proto': '', 'count': count, 'uuid': uuid}) base_params = { 'product': 'FennecAndroid', 'release_channel': utils.get_search_channel(channel), 'date': search_date, 'build_id': '', 'signature': '', '_aggs.signature': 'uuid', '_results_number': 0, '_facets': 'build_id', '_facets_size': 100 } queries = [] sgns_by_bids = utils.get_sgns_by_bids(signatures) for bid, all_signatures in sgns_by_bids.items(): params = copy.deepcopy(base_params) params['build_id'] = utils.get_buildid(bid) for sgns in Connection.chunks(all_signatures, 10): params = copy.deepcopy(params) params['signature'] = ['=' + s for s in sgns] queries.append( Query(socorro.SuperSearch.URL, params=params, handler=handler, handlerdata=signatures)) socorro.SuperSearch(queries=queries).wait() logger.info('Get uuids for FennecAndroid-{}: finished.'.format(channel))
def get_data(params, channel): if os.path.isfile(MyRevision.PATH): with open(MyRevision.PATH, 'r') as In: data = json.load(In) else: data = {} params_str = get_params_query(params) if params_str not in data: hdata = [] Connection( Mercurial.HG_URL, queries=Query(_Revision.get_url(channel), params, MyRevision.handler, hdata), ).wait() data[params_str] = hdata[0] dumpjson(MyRevision.PATH, data) return hdata[0] return data[params_str]
def get_hg(self, bugs): url = hgmozilla.Revision.get_url(self.channel) queries = [] not_landed = set() def handler_rev(json, data): info = utils.get_info_from_hg(json) if info["bugid"] == data["bugid"] and not info["backedout"]: data["ok"] = True for info in bugs.values(): for rev, i in info.get("land", {}).items(): queries.append(Query(url, {"node": rev}, handler_rev, i)) if queries: hgmozilla.Revision(queries=queries).wait() for bugid, info in bugs.items(): if all(not i["ok"] for i in info.get("land", {}).values()): not_landed.add(bugid) return not_landed
def get_signatures(): def handler(json, data): for i in json['facets']['signature']: data.append(i['term']) if not os.path.exists('tests/data/socorro/test_date.json'): date = lmdutils.get_date_ymd('today') dumpjson('tests/data/socorro/test_date.json', date.strftime('%Y-%m-%d')) else: with open('tests/data/socorro/test_date.json', 'r') as In: date = lmdutils.get_date_ymd(json.load(In)) few_days_ago = date - relativedelta(days=3) search_date = _SuperSearch.get_search_date(few_days_ago) params = { 'date': search_date, '_results_number': 0, '_facets': 'signature', '_facets_size': 100, } data = {'Firefox': [], 'FennecAndroid': []} queries = [] for prod, hdata in data.items(): pparams = params.copy() pparams['product'] = prod queries.append( Query( MySuperSearch.URL, params=pparams, handler=handler, handlerdata=hdata, ) ) MySuperSearch(queries=queries).wait() return data
def get_hg(self, bugs): url = hgmozilla.Revision.get_url('nightly') queries = [] def handler_rev(json, data): push = json['pushdate'][0] push = datetime.datetime.utcfromtimestamp(push) push = lmdutils.as_utc(push) data['date'] = lmdutils.get_date_str(push) data['backedout'] = utils.is_backout(json) m = BUG_PAT.search(json['desc']) if not m or m.group(1) != data['bugid']: data['bugid'] = '' for info in bugs.values(): for rev, i in info['land'].items(): queries.append(Query(url, {'node': rev}, handler_rev, i)) if queries: hgmozilla.Revision(queries=queries).wait() # clean bug_torm = [] for bug, info in bugs.items(): torm = [] for rev, i in info['land'].items(): if not i['bugid'] or not (self.date <= lmdutils.get_date_ymd( i['date']) < self.tomorrow): torm.append(rev) for x in torm: del info['land'][x] if not info['land']: bug_torm.append(bug) for x in bug_torm: del bugs[x] self.get_hg_patches(bugs)
def get_bt(info, cache=None, verbose=False): """Get info from different backtraces Args: info (dict): signature -> uuids Returns: dict: info about the different backtraces """ def handler(json, data): uuid = json['uuid'] jd = json['json_dump'] if 'threads' in jd and 'crashedThread' in json: thread_nb = json['crashedThread'] if thread_nb is not None: frames = jd['threads'][thread_nb]['frames'] functions = tuple(frame['function'] for frame in frames if 'function' in frame) files = tuple( frame.get('file', None) for frame in frames if 'function' in frame) lines = tuple( frame.get('line', 0) for frame in frames if 'function' in frame) if functions in data[0]: data[0][functions]['uuids'].append(uuid) data[0][functions]['count'] += data[1]['count'] else: data[0][functions] = { 'count': data[1]['count'], 'uuids': [uuid], 'files': files, 'lines': lines, 'processed': False } data = {} queries = [] cached_uuids = cache['uuids'] if cache else set() for sgn, pucs in info.items(): d = {} data[sgn] = d for puc in pucs: uuid = puc['uuid'] if uuid not in cached_uuids: __warn('New UUID: %s' % uuid, verbose) queries.append( Query(socorro.ProcessedCrash.URL, params={'crash_id': uuid}, handler=handler, handlerdata=(d, puc))) else: __warn('Old UUID: %s' % uuid, verbose) if queries: socorro.ProcessedCrash(queries=queries).wait() if cache: cached_bt_info = cache['bt_info'] for sgn, info in data.items(): if sgn in cached_bt_info: for bt in cached_bt_info[sgn].keys(): if bt in info: cached_bt_info[sgn][bt]['count'] += info[bt]['count'] info[bt]['processed'] = True return data
def get_uuids(channel, product='Firefox', date='today', limit=10000, max_days=3, threshold=5): end_date = utils.get_date_ymd(date) start_date = end_date - timedelta(days=max_days + 1) search_date = socorro.SuperSearch.get_search_date(start_date, end_date) r = range(max_days + 1) default_trend = {start_date + timedelta(days=i): 0 for i in r} data = defaultdict(lambda: copy.deepcopy(default_trend)) def handler(json, data): if not json['errors']: for facets in json['facets']['histogram_date']: d = utils.get_date_ymd(facets['term']) s = facets['facets']['signature'] for signature in s: count = signature['count'] sgn = signature['term'] data[sgn][d] += count socorro.SuperSearch(params={ 'product': product, 'date': search_date, 'release_channel': channel, '_histogram.date': 'signature', '_facets_size': limit, '_results_number': 1 }, handler=handler, handlerdata=data).wait() new_signatures = get_new_signatures(data, threshold=threshold) if new_signatures: data = {} queries = [] def handler(json, data): if not json['errors']: for facets in json['facets']['proto_signature']: proto = facets['term'] count = facets['count'] facets = facets['facets'] signature = facets['signature'][0]['term'] first_uuid = facets['uuid'][0]['term'] data[proto] = { 'uuid': first_uuid, 'count': count, 'signature': signature } for sgns in Connection.chunks(new_signatures, 5): queries.append( Query(socorro.SuperSearch.URL, { 'product': product, 'date': search_date, 'signature': ['=' + s for s in sgns], 'release_channel': channel, '_aggs.proto_signature': ['uuid', 'signature'], '_facets_size': 1000, '_results_number': 0 }, handler=handler, handlerdata=data)) socorro.SuperSearch(queries=queries).wait() return data, search_date return {}, ''
def get(channel, date, product='Firefox', duration=11, tc_limit=50, crash_type='all', startup=False): """Get crashes info Args: channel (str): the channel date (str): the final date product (Optional[str]): the product duration (Optional[int]): the duration to retrieve the data tc_limit (Optional[int]): the number of topcrashes to load crash_type (Optional[str]): 'all' (default) or 'browser' or 'content' or 'plugin' Returns: dict: contains all the info relative to the crashes """ channel = channel.lower() version = v[channel] sys.stdout.write('Getting version information from Socorro...') sys.stdout.flush() versions = versions_util.get_channel_versions(channel, product) sys.stdout.write(' ✔\n') sys.stdout.flush() if crash_type and isinstance(crash_type, six.string_types): crash_type = [crash_type] _date = utils.get_date_ymd(date) start_date = utils.get_date_str(_date - timedelta(duration - 1)) end_date = utils.get_date_str(_date) signatures = {} def signature_handler(json): for signature in json['facets']['signature']: signatures[signature['term']] = [signature['count'], 0, 0, 0, 0, 0] for platform in signature['facets']['platform']: if platform['term'] == 'Linux': signatures[signature['term']][3] = platform['count'] elif platform['term'] == 'Windows NT': signatures[signature['term']][1] = platform['count'] elif platform['term'] == 'Mac OS X': signatures[signature['term']][2] = platform['count'] for startup_crash in signature['facets']['startup_crash']: if startup_crash['term'] in ['1', 'T']: signatures[signature['term']][4] += startup_crash['count'] signatures[signature['term']][5] = signature['facets'][ 'cardinality_install_time']['value'] params = { 'product': product, 'version': versions, 'date': socorro.SuperSearch.get_search_date(start_date, end_date), 'release_channel': channel, '_aggs.signature': ['platform', '_cardinality.install_time', 'startup_crash'], '_results_number': 0, '_facets_size': tc_limit, } if startup: params['startup_crash'] = True sys.stdout.write('Getting top signatures from Socorro...') sys.stdout.flush() socorro.SuperSearch(params=params, handler=signature_handler).wait() sys.stdout.write(' ✔\n') sys.stdout.flush() bug_flags = [ 'resolution', 'id', 'last_change_time', 'cf_tracking_firefox' + str(version) ] for i in range(int(version), int(v['nightly']) + 1): bug_flags.append('cf_status_firefox' + str(i)) # TODO: too many requests... should be improved with chunks bugs = {} # TODO: Use regexp, when the Bugzilla bug that prevents them from working will be fixed. base = { 'j_top': 'OR', 'o1': 'substring', 'f1': 'cf_crash_signature', 'v1': None, 'o2': 'substring', 'f2': 'cf_crash_signature', 'v2': None, 'o3': 'substring', 'f3': 'cf_crash_signature', 'v3': None, 'o4': 'substring', 'f4': 'cf_crash_signature', 'v4': None, 'include_fields': bug_flags } queries = [] for sgn in signatures.keys(): cparams = base.copy() cparams['v1'] = '[@' + sgn + ']' cparams['v2'] = '[@ ' + sgn + ' ]' cparams['v3'] = '[@ ' + sgn + ']' cparams['v4'] = '[@' + sgn + ' ]' bugs[sgn] = [] queries.append( Query(Bugzilla.API_URL, cparams, __bug_handler, bugs[sgn])) res_bugs = Bugzilla(queries=queries) # we have stats by signature in self.signatures # for each signature get the number of crashes on the last X days # so get the signature trend trends = {} default_trend = {} for i in range(duration): default_trend[_date - timedelta(i)] = 0 base = { 'product': product, 'version': versions, 'signature': None, 'date': socorro.SuperSearch.get_search_date(start_date, end_date), 'release_channel': channel, '_results_number': 0, '_histogram.date': ['signature'], '_histogram_interval': 1 } queries = [] for sgns in Connection.chunks( list(map(lambda sgn: '=' + sgn, signatures.keys())), 10): sgn_group = [] for sgn in sgns: if sum(len(s) for s in sgn_group) >= 1000: cparams = base.copy() cparams['signature'] = sgn_group queries.append( Query(socorro.SuperSearch.URL, cparams, functools.partial(__trend_handler, default_trend), trends)) sgn_group = [] sgn_group.append(sgn) if len(sgn_group) > 0: cparams = base.copy() cparams['signature'] = sgn_group queries.append( Query(socorro.SuperSearch.URL, cparams, functools.partial(__trend_handler, default_trend), trends)) sys.stdout.write('Getting trends for top signatures from Socorro...') sys.stdout.flush() socorro.SuperSearch(queries=queries).wait() sys.stdout.write(' ✔\n') sys.stdout.flush() for sgn, trend in trends.items(): signatures[sgn] = (signatures[sgn], [ trend[key] for key in sorted(trend.keys(), reverse=True) ]) _signatures = {} # order self.signatures by crash count sorted_signatures = sorted(signatures.items(), key=lambda x: x[1][0][0], reverse=True) i = 1 for s in sorted_signatures: _signatures[s[0]] = i # top crash rank i += 1 sys.stdout.write( 'Getting bugs linked to the top signatures from Bugzilla...') sys.stdout.flush() while True: try: for r in res_bugs.results: r.result(timeout=2) break except TimeoutError: sys.stdout.write('.') sys.stdout.flush() sys.stdout.write(' ✔\n') sys.stdout.flush() # TODO: In the first query to get the bugs, also get dupe_of and avoid the first query # in follow_dup (so modify follow_dup to accept both a bug ID or a bug object). queries = [] for sgn in signatures.keys(): duplicate_ids = [ bug['id'] for bug in bugs[sgn] if bug['resolution'] == 'DUPLICATE' ] # Remove bugs resolved as DUPLICATE from the list of bugs associated to the signature. bugs[sgn] = [ bug for bug in bugs[sgn] if bug['id'] not in duplicate_ids ] # Find duplicates for bugs resolved as DUPLICATE. duplicates = { k: v for k, v in Bugzilla.follow_dup(duplicate_ids).items() if v is not None } duplicate_targets = [ bug_id for bug_id in duplicates.values() if int(bug_id) not in [bug['id'] for bug in bugs[sgn]] ] if len(duplicate_targets) == 0: continue # Get info about bugs that the DUPLICATE bugs have been duped to. params = { 'id': ','.join(duplicate_targets), 'include_fields': bug_flags, } queries.append( Query(Bugzilla.API_URL, params, __bug_handler, bugs[sgn])) sys.stdout.write( 'Resolving duplicate bugs to the bugs they\'ve been duplicated to...') sys.stdout.flush() Bugzilla(queries=queries).wait() sys.stdout.write(' ✔\n') sys.stdout.flush() for sgn, stats in signatures.items(): # stats is 2-uple: ([count, win_count, mac_count, linux_count, startup_count], trend) startup_percent = float(stats[0][4]) / float(stats[0][0]) _signatures[sgn] = { 'tc_rank': _signatures[sgn], 'crash_count': stats[0][0], 'estimated_user_count': stats[0][5], 'startup_percent': startup_percent, 'bugs': bugs[sgn] } return { 'start_date': start_date, 'end_date': end_date, 'versions': list(versions), 'signatures': _signatures, }
def get_buildids(search_date, channels, products): data = {p: {c: list() for c in channels} for p in products} def handler(chan, threshold, json, data): if not json['facets']['build_id']: return for facets in json['facets']['build_id']: count = facets['count'] if count >= threshold: version = facets['facets']['version'][0]['term'] if chan != 'beta' or not (version.endswith('a2') or version.endswith('b0')): buildid = facets['term'] data.append((buildid, version, count)) params = { 'product': '', 'release_channel': '', 'date': search_date, '_aggs.build_id': 'version', '_results_number': 0, '_facets': 'release_channel', '_facets_size': 1000 } queries = [] for prod in products: pparams = copy.deepcopy(params) pparams['product'] = prod for chan in channels: params = copy.deepcopy(pparams) if chan == 'beta' and prod == 'Firefox': params['release_channel'] = ['beta', 'aurora'] else: params['release_channel'] = chan threshold = config.get_min_total(prod, chan) hdler = functools.partial(handler, chan, threshold) queries.append( Query(socorro.SuperSearch.URL, params=params, handler=hdler, handlerdata=data[prod][chan])) socorro.SuperSearch(queries=queries).wait() for prod, info in data.items(): data[prod] = remove_dup_versions(info) res = {} for prod, info in data.items(): res[prod] = d = {} for chan, bids in info.items(): bids = sorted(bids) min_v = config.get_versions(prod, chan) if len(bids) > min_v: bids = bids[-min_v:] bids = [(utils.get_build_date(bid), v) for bid, v in bids] d[chan] = bids logger.info('Buildids for {}/{} got.'.format(products, channels)) return res
def get_sgns_by_buildid(signatures, channels, products, search_date, bids): base = utils.get_base_list(bids) limit = config.get_limit_facets() logger.info('Get crash numbers for {}-{}: started.'.format( products, channels)) def handler(base, index, json, data): if not json['facets']['signature']: return for facets in json['facets']['signature']: sgn = facets['term'] if sgn not in data: data[sgn] = copy.deepcopy(base) data[sgn][index][RAW] = facets['count'] facets = facets['facets'] n = len(facets['install_time']) if n == limit: n = facets['cardinality_install_time']['value'] data[sgn][index][INSTALLS] = n base_params = { 'product': '', 'release_channel': '', 'build_id': '', 'date': search_date, '_aggs.signature': ['install_time', '_cardinality.install_time'], '_results_number': 0, '_facets': 'release_channel', '_facets_size': limit } ratios = {} res = {} for prod in products: pparams = copy.deepcopy(base_params) pparams['product'] = prod base_prod = base[prod] bids_prod = bids[prod] ratios[prod] = ratios_prod = {} res[prod] = res_prod = {} for chan in channels: params = copy.deepcopy(pparams) params['release_channel'] = chan data = {} sbids = [b for b, _ in bids_prod[chan]] queries = [] for index, bid in enumerate(sbids): params = copy.deepcopy(params) params['build_id'] = utils.get_buildid(bid) hdler = functools.partial(handler, base_prod[chan], index) queries.append( Query(socorro.SuperSearch.URL, params=params, handler=hdler, handlerdata=data)) socorro.SuperSearch(queries=queries).wait() ratios_prod[chan] = tools.get_global_ratios(data) # now we've ratios, we can remove useless signatures res_prod[chan] = {s: n for s, n in data.items() if s in signatures} logger.info('Get crash numbers for {}-{}: finished.'.format( products, channels)) return res, ratios
def get_sgns_data(channels, versions, signatures, products, date='today'): today = lmdutils.get_date_ymd(date) few_days_ago = today - relativedelta(days=config.get_limit()) search_date = socorro.SuperSearch.get_search_date(few_days_ago) nbase = [0, 0] data = {} bids, all_bids, all_versions, doubloons = get_all_buildids(versions) for product in products: data[product] = d1 = {} b1 = bids[product] for chan in channels: d1[chan] = d2 = {} b2 = b1[chan] for signature in signatures: d2[signature] = b2 limit = 80 def handler(sgn, json, data): if not json['facets']['build_id']: return for facets in json['facets']['build_id']: bid = facets['term'] prod, chan, ver = all_bids[str(bid)] _facets = facets['facets'] chans = set() for c in _facets['release_channel']: c = c['term'] if c == 'aurora': chans.add('beta') else: chans.add(c) if len(_facets['product']) != 1 or len(chans) != 1: bid = str(bid) doubloons[bid] = [(prod, chan, ver)] else: dpc = data[prod][chan] nums = dpc[sgn] bid = utils.get_build_date(bid) if isinstance(nums, list): dpc[sgn] = nums = {b: copy.copy(nbase) for b in dpc[sgn]} if bid in nums: n = nums[bid] n[RAW] = facets['count'] N = len(_facets['install_time']) if N == limit: N = _facets['cardinality_install_time']['value'] n[INSTALLS] = N base_params = { 'build_id': list(all_bids.keys()), 'signature': '', 'version': all_versions, 'date': search_date, '_aggs.build_id': [ 'install_time', '_cardinality.install_time', 'release_channel', 'product' ], '_results_number': 0, '_facets': 'signature', '_facets_size': limit } queries = [] for signature in signatures: params = copy.deepcopy(base_params) params['signature'] = '=' + signature hdler = functools.partial(handler, signature) queries.append( Query(socorro.SuperSearch.URL, params=params, handler=hdler, handlerdata=data)) socorro.SuperSearch(queries=queries).wait() get_sgns_for_doubloons(doubloons, signatures, search_date, data) res = defaultdict(lambda: defaultdict(lambda: dict())) for p, i in data.items(): for c, j in i.items(): for sgn, numbers in j.items(): if not isinstance(numbers, list): res[p][c][sgn] = numbers return res