def get_data(params): if os.path.isfile(MySuperSearch.PATH): with open(MySuperSearch.PATH, 'r') as In: data = json.load(In) else: data = {} for k, v in params.items(): if isinstance(v, list): params[k] = sorted(v) params_str = get_params_query(params) if params_str not in data: hdata = [] Connection( socorro.Socorro.API_URL, queries=Query(_SuperSearch.URL, params, MySuperSearch.handler, hdata), ).wait() data[params_str] = hdata[0] dumpjson(MySuperSearch.PATH, data) return hdata[0] return data[params_str]
def get_nick(authors): bz = {} def user_handler(u): real = u['real_name'] m = NICK_PAT.search(real) nick = m.group(1) if m else '' name = u['name'] bz[name] = {'name': name, 'real_name': real, 'nick_name': nick} authors = list(authors) queries = [] for chunk in Connection.chunks(authors, 20): query = BugzillaUser(user_names=chunk, include_fields=['name', 'real_name'], user_handler=user_handler) queries.append(query) for q in queries: q.wait() authors = [bz[a] for a in authors if a in bz] return authors
def get_proto_small(product, signatures, search_date, channel): """Get the proto-signatures for signature with a small number of crashes. Since we 'must' aggregate uuid on proto-signatures, to be faster we query several signatures: it's possible because we know that card(proto) <= card(crashes) for a given signature.""" logger.info('Get proto-signatures (small) for {}-{}: started.'.format( product, channel)) def handler(bid, threshold, json, data): if not json['facets']['proto_signature']: return for facets in json['facets']['proto_signature']: _facets = facets['facets'] sgn = _facets['signature'][0]['term'] protos = data[sgn]['protos'][bid] if len(protos) < threshold: proto = facets['term'] count = facets['count'] uuid = _facets['uuid'][0]['term'] protos.append({'proto': proto, 'count': count, 'uuid': uuid}) for facets in json['facets']['signature']: sgn = facets['term'] count = facets['facets']['cardinality_install_time']['value'] data[sgn]['installs'][bid] = 1 if count == 0 else count limit = config.get_limit_facets() threshold = config.get_threshold('protos', product, channel) base_params = { 'product': product, 'release_channel': utils.get_search_channel(channel), 'date': search_date, 'build_id': '', 'signature': '', '_aggs.proto_signature': ['uuid', 'signature'], '_aggs.signature': '_cardinality.install_time', '_results_number': 0, '_facets': 'release_channel', '_facets_size': limit } sgns_by_bids = utils.get_sgns_by_bids(signatures) for bid, all_signatures in sgns_by_bids.items(): params = copy.deepcopy(base_params) params['build_id'] = utils.get_buildid(bid) queries = [] hdler = functools.partial(handler, bid, threshold) for sgns in Connection.chunks(all_signatures, 10): params = copy.deepcopy(params) params['signature'] = ['=' + s for s in sgns] queries.append( Query(socorro.SuperSearch.URL, params=params, handler=hdler, handlerdata=signatures)) socorro.SuperSearch(queries=queries).wait() logger.info('Get proto-signatures (small) for {}-{}: finished.'.format( product, channel))
def get_stats_for_past_weeks(product, channel, start_date_by_channel, versions_by_channel, analysis, search_start_date, end_date, check_for_fx=True): queries = [] trends = {} signatures_by_chan = {} default_trend_by_chan = {} ref_monday, _ = utils.get_monday_sunday(utils.get_date_ymd(end_date)) def get_past_week(date): monday, _ = utils.get_monday_sunday(date) return (ref_monday - monday).days // 7 for chan in channel: past_w = get_past_week(start_date_by_channel[chan]) default_trend_by_chan[chan] = {i: 0 for i in range(past_w + 1)} for signature, info in analysis.items(): if not check_for_fx or info['firefox']: data = {} trends[signature] = data # for chan, volume in info['affected']: for chan in channel: if chan in signatures_by_chan: signatures_by_chan[chan].append(signature) else: signatures_by_chan[chan] = [signature] data[chan] = default_trend_by_chan[chan].copy() def handler_ss(chan, json, data): sgns = [] for facets in json['facets']['histogram_date']: d = utils.get_date_ymd(facets['term']) w = get_past_week(d) s = facets['facets']['signature'] for signature in s: count = signature['count'] sgn = signature['term'] sgns.append(sgn) data[sgn][chan][w] += count for chan, signatures in signatures_by_chan.items(): if search_start_date: search_date = socorro.SuperSearch.get_search_date(search_start_date, end_date) else: search_date = socorro.SuperSearch.get_search_date(utils.get_date_str(start_date_by_channel[chan]), end_date) vers = versions_by_channel[chan] for sgns in Connection.chunks(signatures, 10): queries.append(Query(socorro.SuperSearch.URL, {'signature': ['=' + s for s in sgns], 'product': product, 'version': vers, 'release_channel': chan, 'date': search_date, '_histogram.date': 'signature', '_results_number': 0}, handler=functools.partial(handler_ss, chan), handlerdata=trends)) socorro.SuperSearch(queries=queries).wait() return trends
def filter_nightly_buildids(buildids): def handler(threshold, json, data): if not json['facets']['build_id']: return for facets in json['facets']['build_id']: count = facets['count'] if count >= threshold: bid = utils.get_build_date(facets['term']) data[bid] = True params = { 'product': '', 'build_id': '', 'date': '', 'release_channel': 'nightly', '_aggs.build_id': 'release_channel', '_results_number': 0, '_facets': 'release_channel', '_facets_size': 1000 } data = {'Firefox': None, 'FennecAndroid': None} queries = [] for prod in data.keys(): pparams = copy.deepcopy(params) pparams['product'] = prod threshold = config.get_min_total(prod, 'nightly') data[prod] = D = {} for bids in Connection.chunks(buildids[prod]['nightly'], chunk_size=128): pparams = copy.deepcopy(pparams) pparams['date'] = '>=' + bids[0][0].strftime('%Y-%m-%d') pparams['build_id'] = L = [] for b in bids: L.append(utils.get_buildid(b[0])) D[b[0]] = False hdler = functools.partial(handler, threshold) queries.append( Query(socorro.SuperSearch.URL, params=pparams, handler=hdler, handlerdata=D)) socorro.SuperSearch(queries=queries).wait() for prod, info in data.items(): bids = buildids[prod]['nightly'] L = [(bid, info[bid[0]]) for bid in bids] for i in range(len(L) - 1, -1, -1): if not L[i][1]: L[i] = (L[i][0], True) else: break buildids[prod]['nightly'] = [x[0] for x in L if x[1]]
def get_uuids_fennec(signatures, search_date, channel): """Get the uuids for Fennec java crashes""" logger.info("Get uuids for Fennec-{}: started.".format(channel)) def handler(json, data): if json["errors"] or not json["facets"]["signature"]: return bid = json["facets"]["build_id"][0]["term"] bid = utils.get_build_date(bid) for facets in json["facets"]["signature"]: sgn = facets["term"] count = facets["count"] facets = facets["facets"] uuid = facets["uuid"][0]["term"] protos = data[sgn]["protos"][bid] if not protos: protos.append({"proto": "", "count": count, "uuid": uuid}) base_params = { "product": "Fennec", "release_channel": utils.get_search_channel(channel), "date": search_date, "build_id": "", "signature": "", "_aggs.signature": "uuid", "_results_number": 0, "_facets": "build_id", "_facets_size": 100, } queries = [] sgns_by_bids = utils.get_sgns_by_bids(signatures) for bid, all_signatures in sgns_by_bids.items(): params = copy.deepcopy(base_params) params["build_id"] = utils.get_buildid(bid) for sgns in Connection.chunks(all_signatures, 10): params = copy.deepcopy(params) params["signature"] = ["=" + s for s in sgns] queries.append( Query( socorro.SuperSearch.URL, params=params, handler=handler, handlerdata=signatures, )) socorro.SuperSearch(queries=queries).wait() logger.info("Get uuids for Fennec-{}: finished.".format(channel))
def get_uuids_fennec(signatures, search_date, channel): """Get the uuids for FennecAndroid java crashes""" logger.info('Get uuids for FennecAndroid-{}: started.'.format(channel)) def handler(json, data): if json['errors'] or not json['facets']['signature']: return bid = json['facets']['build_id'][0]['term'] bid = utils.get_build_date(bid) for facets in json['facets']['signature']: sgn = facets['term'] count = facets['count'] facets = facets['facets'] uuid = facets['uuid'][0]['term'] protos = data[sgn]['protos'][bid] if not protos: protos.append({'proto': '', 'count': count, 'uuid': uuid}) base_params = { 'product': 'FennecAndroid', 'release_channel': utils.get_search_channel(channel), 'date': search_date, 'build_id': '', 'signature': '', '_aggs.signature': 'uuid', '_results_number': 0, '_facets': 'build_id', '_facets_size': 100 } queries = [] sgns_by_bids = utils.get_sgns_by_bids(signatures) for bid, all_signatures in sgns_by_bids.items(): params = copy.deepcopy(base_params) params['build_id'] = utils.get_buildid(bid) for sgns in Connection.chunks(all_signatures, 10): params = copy.deepcopy(params) params['signature'] = ['=' + s for s in sgns] queries.append( Query(socorro.SuperSearch.URL, params=params, handler=handler, handlerdata=signatures)) socorro.SuperSearch(queries=queries).wait() logger.info('Get uuids for FennecAndroid-{}: finished.'.format(channel))
def get_data(params, channel): if os.path.isfile(MyRevision.PATH): with open(MyRevision.PATH, 'r') as In: data = json.load(In) else: data = {} params_str = get_params_query(params) if params_str not in data: hdata = [] Connection( Mercurial.HG_URL, queries=Query(_Revision.get_url(channel), params, MyRevision.handler, hdata), ).wait() data[params_str] = hdata[0] dumpjson(MyRevision.PATH, data) return hdata[0] return data[params_str]
def get_sgns_info(sgns_by_chan, product='Firefox', date='today', query={}, versions=None): today = utils.get_date(date) tomorrow = utils.get_date(date, -1) data = {chan: {} for chan in sgns_by_chan.keys()} def handler(json, data): if json['errors'] or not json['facets']['signature']: return for facets in json['facets']['signature']: sgn = facets['term'] count = facets['count'] platforms = defaultdict(lambda: 0) startup = {True: 0, False: 0} data[sgn] = { 'count': count, 'platforms': platforms, 'startup': startup } facets = facets['facets'] for ppv in facets['platform_pretty_version']: platforms[ppv['term']] += ppv['count'] for sc in facets['startup_crash']: term = sc['term'] startup[term == 'T'] += sc['count'] base = { 'product': product, 'date': ['>=' + today, '<' + tomorrow], 'release_channel': '', 'version': '', 'signature': '', '_aggs.signature': ['platform_pretty_version', 'startup_crash'], '_results_number': 0, '_facets_size': 100 } base.update(query) searches = [] for chan, signatures in sgns_by_chan.items(): params = copy.copy(base) params['release_channel'] = chan if versions: params['version'] = versions[chan] for sgns in Connection.chunks(signatures, 10): p = copy.copy(params) p['signature'] = ['=' + s for s in sgns] searches.append( socorro.SuperSearch(params=p, handler=handler, handlerdata=data[chan])) for s in searches: s.wait() return data
def get_signatures(limit, product, versions, channel, search_date, signatures, bug_ids, verbose): if limit <= 0: count = [] socorro.SuperSearch( params={ 'product': product, 'version': versions, 'date': search_date, 'release_channel': channel, '_facets_size': 1, '_results_number': 0 }, handler=lambda json: count.append(json['total'])).wait() limit = count[0] __warn('Maximum signatures to collect: %d' % limit, verbose) __signatures = {} known_platforms = {'Windows NT', 'Windows', 'Mac OS X', 'Linux'} known_wtf_platforms = {'0x00000000', ''} ignored_signatures = get_ignored_signatures() def handler_ss(json, data): n = 0 for bucket in json['facets']['signature']: signature = bucket['term'] if signature in ignored_signatures: continue n += 1 if n > limit: break l1 = [] l2 = [] data[signature] = { 'affected_channels': l1, 'platforms': l2, 'selected_bug': None, 'jsbugmon': 0, 'bugs': None } facets = bucket['facets'] for c in facets['release_channel']: l1.append((c['term'], c['count'])) for p in facets['platform']: os = p['term'] if os and os in known_platforms: if os == 'Windows NT': os = 'Windows' l2.append(os) elif os not in known_wtf_platforms: __warn('Unknown os: %s' % os) all_versions = [] for c in channel: all_versions += versions[c] if signatures or bug_ids: if bug_ids: _sgns = Bugzilla.get_signatures(bug_ids) set_sgns = set(signatures) for ss in _sgns.values(): if ss: set_sgns = set_sgns.union(set(ss)) signatures = list(set_sgns) queries = [] for sgns in Connection.chunks(signatures, 10): queries.append( Query(socorro.SuperSearch.URL, { 'signature': ['=' + s for s in sgns], 'product': product, 'version': all_versions, 'release_channel': channel, 'date': search_date, '_aggs.signature': ['release_channel', 'platform'], '_facets_size': max(limit, 100), '_results_number': 0 }, handler=handler_ss, handlerdata=__signatures)) socorro.SuperSearch(queries=queries).wait() else: socorro.SuperSearch(params={ 'product': product, 'version': all_versions, 'release_channel': channel, 'date': search_date, '_aggs.signature': ['release_channel', 'platform'], '_facets_size': max(limit, 100), '_results_number': 0 }, handler=handler_ss, handlerdata=__signatures, timeout=300).wait() return __signatures
def get(channel, date, product='Firefox', duration=11, tc_limit=50, crash_type='all', startup=False): """Get crashes info Args: channel (str): the channel date (str): the final date product (Optional[str]): the product duration (Optional[int]): the duration to retrieve the data tc_limit (Optional[int]): the number of topcrashes to load crash_type (Optional[str]): 'all' (default) or 'browser' or 'content' or 'plugin' Returns: dict: contains all the info relative to the crashes """ channel = channel.lower() version = v[channel] sys.stdout.write('Getting version information from Socorro...') sys.stdout.flush() versions = versions_util.get_channel_versions(channel, product) sys.stdout.write(' ✔\n') sys.stdout.flush() if crash_type and isinstance(crash_type, six.string_types): crash_type = [crash_type] _date = utils.get_date_ymd(date) start_date = utils.get_date_str(_date - timedelta(duration - 1)) end_date = utils.get_date_str(_date) signatures = {} def signature_handler(json): for signature in json['facets']['signature']: signatures[signature['term']] = [signature['count'], 0, 0, 0, 0, 0] for platform in signature['facets']['platform']: if platform['term'] == 'Linux': signatures[signature['term']][3] = platform['count'] elif platform['term'] == 'Windows NT': signatures[signature['term']][1] = platform['count'] elif platform['term'] == 'Mac OS X': signatures[signature['term']][2] = platform['count'] for startup_crash in signature['facets']['startup_crash']: if startup_crash['term'] in ['1', 'T']: signatures[signature['term']][4] += startup_crash['count'] signatures[signature['term']][5] = signature['facets'][ 'cardinality_install_time']['value'] params = { 'product': product, 'version': versions, 'date': socorro.SuperSearch.get_search_date(start_date, end_date), 'release_channel': channel, '_aggs.signature': ['platform', '_cardinality.install_time', 'startup_crash'], '_results_number': 0, '_facets_size': tc_limit, } if startup: params['startup_crash'] = True sys.stdout.write('Getting top signatures from Socorro...') sys.stdout.flush() socorro.SuperSearch(params=params, handler=signature_handler).wait() sys.stdout.write(' ✔\n') sys.stdout.flush() bug_flags = [ 'resolution', 'id', 'last_change_time', 'cf_tracking_firefox' + str(version) ] for i in range(int(version), int(v['nightly']) + 1): bug_flags.append('cf_status_firefox' + str(i)) # TODO: too many requests... should be improved with chunks bugs = {} # TODO: Use regexp, when the Bugzilla bug that prevents them from working will be fixed. base = { 'j_top': 'OR', 'o1': 'substring', 'f1': 'cf_crash_signature', 'v1': None, 'o2': 'substring', 'f2': 'cf_crash_signature', 'v2': None, 'o3': 'substring', 'f3': 'cf_crash_signature', 'v3': None, 'o4': 'substring', 'f4': 'cf_crash_signature', 'v4': None, 'include_fields': bug_flags } queries = [] for sgn in signatures.keys(): cparams = base.copy() cparams['v1'] = '[@' + sgn + ']' cparams['v2'] = '[@ ' + sgn + ' ]' cparams['v3'] = '[@ ' + sgn + ']' cparams['v4'] = '[@' + sgn + ' ]' bugs[sgn] = [] queries.append( Query(Bugzilla.API_URL, cparams, __bug_handler, bugs[sgn])) res_bugs = Bugzilla(queries=queries) # we have stats by signature in self.signatures # for each signature get the number of crashes on the last X days # so get the signature trend trends = {} default_trend = {} for i in range(duration): default_trend[_date - timedelta(i)] = 0 base = { 'product': product, 'version': versions, 'signature': None, 'date': socorro.SuperSearch.get_search_date(start_date, end_date), 'release_channel': channel, '_results_number': 0, '_histogram.date': ['signature'], '_histogram_interval': 1 } queries = [] for sgns in Connection.chunks( list(map(lambda sgn: '=' + sgn, signatures.keys())), 10): sgn_group = [] for sgn in sgns: if sum(len(s) for s in sgn_group) >= 1000: cparams = base.copy() cparams['signature'] = sgn_group queries.append( Query(socorro.SuperSearch.URL, cparams, functools.partial(__trend_handler, default_trend), trends)) sgn_group = [] sgn_group.append(sgn) if len(sgn_group) > 0: cparams = base.copy() cparams['signature'] = sgn_group queries.append( Query(socorro.SuperSearch.URL, cparams, functools.partial(__trend_handler, default_trend), trends)) sys.stdout.write('Getting trends for top signatures from Socorro...') sys.stdout.flush() socorro.SuperSearch(queries=queries).wait() sys.stdout.write(' ✔\n') sys.stdout.flush() for sgn, trend in trends.items(): signatures[sgn] = (signatures[sgn], [ trend[key] for key in sorted(trend.keys(), reverse=True) ]) _signatures = {} # order self.signatures by crash count sorted_signatures = sorted(signatures.items(), key=lambda x: x[1][0][0], reverse=True) i = 1 for s in sorted_signatures: _signatures[s[0]] = i # top crash rank i += 1 sys.stdout.write( 'Getting bugs linked to the top signatures from Bugzilla...') sys.stdout.flush() while True: try: for r in res_bugs.results: r.result(timeout=2) break except TimeoutError: sys.stdout.write('.') sys.stdout.flush() sys.stdout.write(' ✔\n') sys.stdout.flush() # TODO: In the first query to get the bugs, also get dupe_of and avoid the first query # in follow_dup (so modify follow_dup to accept both a bug ID or a bug object). queries = [] for sgn in signatures.keys(): duplicate_ids = [ bug['id'] for bug in bugs[sgn] if bug['resolution'] == 'DUPLICATE' ] # Remove bugs resolved as DUPLICATE from the list of bugs associated to the signature. bugs[sgn] = [ bug for bug in bugs[sgn] if bug['id'] not in duplicate_ids ] # Find duplicates for bugs resolved as DUPLICATE. duplicates = { k: v for k, v in Bugzilla.follow_dup(duplicate_ids).items() if v is not None } duplicate_targets = [ bug_id for bug_id in duplicates.values() if int(bug_id) not in [bug['id'] for bug in bugs[sgn]] ] if len(duplicate_targets) == 0: continue # Get info about bugs that the DUPLICATE bugs have been duped to. params = { 'id': ','.join(duplicate_targets), 'include_fields': bug_flags, } queries.append( Query(Bugzilla.API_URL, params, __bug_handler, bugs[sgn])) sys.stdout.write( 'Resolving duplicate bugs to the bugs they\'ve been duplicated to...') sys.stdout.flush() Bugzilla(queries=queries).wait() sys.stdout.write(' ✔\n') sys.stdout.flush() for sgn, stats in signatures.items(): # stats is 2-uple: ([count, win_count, mac_count, linux_count, startup_count], trend) startup_percent = float(stats[0][4]) / float(stats[0][0]) _signatures[sgn] = { 'tc_rank': _signatures[sgn], 'crash_count': stats[0][0], 'estimated_user_count': stats[0][5], 'startup_percent': startup_percent, 'bugs': bugs[sgn] } return { 'start_date': start_date, 'end_date': end_date, 'versions': list(versions), 'signatures': _signatures, }
def get_signatures(limit, product, versions, channel, search_date, signatures, bug_ids, verbose): if limit <= 0: count = [] socorro.SuperSearch(params={'product': product, 'version': versions, 'date': search_date, 'release_channel': channel, '_facets_size': 1, '_results_number': 0}, handler=lambda json: count.append(json['total'])).wait() limit = count[0] __warn('Maximum signatures to collect: %d' % limit, verbose) __signatures = {} known_platforms = {'Windows NT', 'Windows', 'Mac OS X', 'Linux'} known_wtf_platforms = {'0x00000000', ''} ignored_signatures = get_ignored_signatures() def handler_ss(json, data): n = 0 for bucket in json['facets']['signature']: signature = bucket['term'] if signature in ignored_signatures: continue n += 1 if n > limit: break l1 = [] l2 = [] data[signature] = {'affected_channels': l1, 'platforms': l2, 'selected_bug': None, 'jsbugmon': 0, 'bugs': None} facets = bucket['facets'] for c in facets['release_channel']: l1.append((c['term'], c['count'])) for p in facets['platform']: os = p['term'] if os and os in known_platforms: if os == 'Windows NT': os = 'Windows' l2.append(os) elif os not in known_wtf_platforms: __warn('Unknown os: %s' % os) all_versions = [] for c in channel: all_versions += versions[c] if signatures or bug_ids: if bug_ids: _sgns = Bugzilla.get_signatures(bug_ids) set_sgns = set(signatures) for ss in _sgns.values(): if ss: set_sgns = set_sgns.union(set(ss)) signatures = list(set_sgns) queries = [] for sgns in Connection.chunks(signatures, 10): queries.append(Query(socorro.SuperSearch.URL, {'signature': ['=' + s for s in sgns], 'product': product, 'version': all_versions, 'release_channel': channel, 'date': search_date, '_aggs.signature': ['release_channel', 'platform'], '_facets_size': max(limit, 100), '_results_number': 0}, handler=handler_ss, handlerdata=__signatures)) socorro.SuperSearch(queries=queries).wait() else: socorro.SuperSearch(params={'product': product, 'version': all_versions, 'release_channel': channel, 'date': search_date, '_aggs.signature': ['release_channel', 'platform'], '_facets_size': max(limit, 100), '_results_number': 0}, handler=handler_ss, handlerdata=__signatures, timeout=300).wait() return __signatures
def get_proto_small(product, signatures, search_date, channel): """Get the proto-signatures for signature with a small number of crashes. Since we 'must' aggregate uuid on proto-signatures, to be faster we query several signatures: it's possible because we know that card(proto) <= card(crashes) for a given signature.""" logger.info("Get proto-signatures (small) for {}-{}: started.".format( product, channel)) def handler(bid, threshold, json, data): if not json["facets"]["proto_signature"]: return for facets in json["facets"]["proto_signature"]: _facets = facets["facets"] sgn = _facets["signature"][0]["term"] protos = data[sgn]["protos"][bid] if len(protos) < threshold: proto = facets["term"] count = facets["count"] uuid = _facets["uuid"][0]["term"] protos.append({"proto": proto, "count": count, "uuid": uuid}) for facets in json["facets"]["signature"]: sgn = facets["term"] count = facets["facets"]["cardinality_install_time"]["value"] data[sgn]["installs"][bid] = 1 if count == 0 else count limit = config.get_limit_facets() threshold = config.get_threshold("protos", product, channel) base_params = { "product": product, "release_channel": utils.get_search_channel(channel), "date": search_date, "build_id": "", "signature": "", "_aggs.proto_signature": ["uuid", "signature"], "_aggs.signature": "_cardinality.install_time", "_results_number": 0, "_facets": "release_channel", "_facets_size": limit, } sgns_by_bids = utils.get_sgns_by_bids(signatures) for bid, all_signatures in sgns_by_bids.items(): params = copy.deepcopy(base_params) params["build_id"] = utils.get_buildid(bid) queries = [] hdler = functools.partial(handler, bid, threshold) for sgns in Connection.chunks(all_signatures, 5): params = copy.deepcopy(params) params["signature"] = ["=" + s for s in sgns] queries.append( Query( socorro.SuperSearch.URL, params=params, handler=hdler, handlerdata=signatures, )) socorro.SuperSearch(queries=queries).wait() logger.info("Get proto-signatures (small) for {}-{}: finished.".format( product, channel))
def get_uuids(channel, product='Firefox', date='today', limit=10000, max_days=3, threshold=5): end_date = utils.get_date_ymd(date) start_date = end_date - timedelta(days=max_days + 1) search_date = socorro.SuperSearch.get_search_date(start_date, end_date) r = range(max_days + 1) default_trend = {start_date + timedelta(days=i): 0 for i in r} data = defaultdict(lambda: copy.deepcopy(default_trend)) def handler(json, data): if not json['errors']: for facets in json['facets']['histogram_date']: d = utils.get_date_ymd(facets['term']) s = facets['facets']['signature'] for signature in s: count = signature['count'] sgn = signature['term'] data[sgn][d] += count socorro.SuperSearch(params={ 'product': product, 'date': search_date, 'release_channel': channel, '_histogram.date': 'signature', '_facets_size': limit, '_results_number': 1 }, handler=handler, handlerdata=data).wait() new_signatures = get_new_signatures(data, threshold=threshold) if new_signatures: data = {} queries = [] def handler(json, data): if not json['errors']: for facets in json['facets']['proto_signature']: proto = facets['term'] count = facets['count'] facets = facets['facets'] signature = facets['signature'][0]['term'] first_uuid = facets['uuid'][0]['term'] data[proto] = { 'uuid': first_uuid, 'count': count, 'signature': signature } for sgns in Connection.chunks(new_signatures, 5): queries.append( Query(socorro.SuperSearch.URL, { 'product': product, 'date': search_date, 'signature': ['=' + s for s in sgns], 'release_channel': channel, '_aggs.proto_signature': ['uuid', 'signature'], '_facets_size': 1000, '_results_number': 0 }, handler=handler, handlerdata=data)) socorro.SuperSearch(queries=queries).wait() return data, search_date return {}, ''
def get_uuids_for_spiking_signatures(channel, cache=None, product='Firefox', date='today', limit=10000, max_days=3, threshold=5): psttz = pytz.timezone('US/Pacific') end_date = utils.get_date_ymd(date) # 2016-10-18 UTC end_date_moz = psttz.localize(datetime(end_date.year, end_date.month, end_date.day)) # 2016-10-18 PST end_buildid = utils.get_buildid_from_date(end_date_moz) # < 20161018000000 start_date_moz = end_date_moz - timedelta(days=max_days + 1) # 2016-10-14 PST (max_days == 3) start_buildid = utils.get_buildid_from_date(start_date_moz) # >= 20161014000000 search_buildid = ['>=' + start_buildid, '<' + end_buildid] start_date = utils.as_utc(start_date_moz) # 2016-10-14 07:00:00 UTC search_date = '>=' + utils.get_date_str(start_date) data = defaultdict(lambda: defaultdict(lambda: 0)) buildids = {} def handler(json, data): if not json['errors']: for facets in json['facets']['build_id']: date = utils.get_date_from_buildid(facets['term']).astimezone(psttz) buildids[date] = facets['count'] for s in facets['facets']['signature']: sgn = s['term'] count = s['count'] data[sgn][date] += count socorro.SuperSearch(params={'product': product, 'date': search_date, 'build_id': search_buildid, 'release_channel': channel, '_aggs.build_id': 'signature', '_facets_size': limit, '_results_number': 0}, handler=handler, handlerdata=data).wait() _data = {} base = {start_date_moz + timedelta(days=i): {'buildids': {}, 'total': 0} for i in range(max_days + 1)} # from 2016-10-14 to 2016-10-17 PST for sgn, info in data.items(): d = copy.deepcopy(base) _data[sgn] = d for bid, count in info.items(): date = psttz.localize(datetime(bid.year, bid.month, bid.day)) d[date]['buildids'][bid] = count d[date]['total'] += count data = _data spiking_signatures = [] for sgn, info in data.items(): stats2 = [i['total'] for _, i in sorted(info.items(), key=lambda p: p[0])] if all(i == 0 for i in stats2[:-1]) and stats2[-1] >= threshold: spiking_signatures.append(sgn) data = None if spiking_signatures: # sort the signatures to be sure to always have the same order for the test spiking_signatures = sorted(spiking_signatures) start_buildid = utils.get_buildid_from_date(end_date_moz - timedelta(days=1)) search_buildid = ['>=' + start_buildid, '<' + end_buildid] queries = [] data = defaultdict(lambda: list()) def handler(json, data): if not json['errors']: for facets in json['facets']['proto_signature']: proto = facets['term'] count = facets['count'] facets = facets['facets'] sgn = facets['signature'][0]['term'] first_uuid = facets['uuid'][0]['term'] uuids = {i['term'] for i in facets['uuid']} if cache: i = uuids.intersection(cache['uuids']) uuid = i.pop() if i else first_uuid else: uuid = first_uuid data[sgn].append({'proto': proto, 'uuid': uuid, 'count': count}) for sgns in Connection.chunks(spiking_signatures, 5): queries.append(Query(socorro.SuperSearch.URL, {'product': product, 'date': search_date, 'build_id': search_buildid, 'signature': ['=' + s for s in sgns], 'release_channel': channel, '_aggs.proto_signature': ['uuid', 'signature'], '_facets_size': 10000, '_results_number': 0}, handler=handler, handlerdata=data)) socorro.SuperSearch(queries=queries).wait() return data
def get_uuids_for_spiking_signatures(channel, cache=None, product='Firefox', date='today', limit=10000, max_days=3, threshold=5): psttz = pytz.timezone('US/Pacific') end_date = utils.get_date_ymd(date) # 2016-10-18 UTC end_date_moz = psttz.localize( datetime(end_date.year, end_date.month, end_date.day)) # 2016-10-18 PST end_buildid = utils.get_buildid_from_date(end_date_moz) # < 20161018000000 start_date_moz = end_date_moz - timedelta( days=max_days + 1) # 2016-10-14 PST (max_days == 3) start_buildid = utils.get_buildid_from_date( start_date_moz) # >= 20161014000000 search_buildid = ['>=' + start_buildid, '<' + end_buildid] start_date = utils.as_utc(start_date_moz) # 2016-10-14 07:00:00 UTC search_date = '>=' + utils.get_date_str(start_date) data = defaultdict(lambda: defaultdict(lambda: 0)) buildids = {} def handler(json, data): if not json['errors']: for facets in json['facets']['build_id']: date = utils.get_date_from_buildid( facets['term']).astimezone(psttz) buildids[date] = facets['count'] for s in facets['facets']['signature']: sgn = s['term'] count = s['count'] data[sgn][date] += count socorro.SuperSearch(params={ 'product': product, 'date': search_date, 'build_id': search_buildid, 'release_channel': channel, '_aggs.build_id': 'signature', '_facets_size': limit, '_results_number': 0 }, handler=handler, handlerdata=data).wait() _data = {} base = { start_date_moz + timedelta(days=i): { 'buildids': {}, 'total': 0 } for i in range(max_days + 1) } # from 2016-10-14 to 2016-10-17 PST for sgn, info in data.items(): d = copy.deepcopy(base) _data[sgn] = d for bid, count in info.items(): date = psttz.localize(datetime(bid.year, bid.month, bid.day)) d[date]['buildids'][bid] = count d[date]['total'] += count data = _data spiking_signatures = [] for sgn, info in data.items(): stats2 = [ i['total'] for _, i in sorted(info.items(), key=lambda p: p[0]) ] if all(i == 0 for i in stats2[:-1]) and stats2[-1] >= threshold: spiking_signatures.append(sgn) data = None if spiking_signatures: # sort the signatures to be sure to always have the same order for the test spiking_signatures = sorted(spiking_signatures) start_buildid = utils.get_buildid_from_date(end_date_moz - timedelta(days=1)) search_buildid = ['>=' + start_buildid, '<' + end_buildid] queries = [] data = defaultdict(lambda: list()) def handler(json, data): if not json['errors']: for facets in json['facets']['proto_signature']: proto = facets['term'] count = facets['count'] facets = facets['facets'] sgn = facets['signature'][0]['term'] first_uuid = facets['uuid'][0]['term'] uuids = {i['term'] for i in facets['uuid']} if cache: i = uuids.intersection(cache['uuids']) uuid = i.pop() if i else first_uuid else: uuid = first_uuid data[sgn].append({ 'proto': proto, 'uuid': uuid, 'count': count }) for sgns in Connection.chunks(spiking_signatures, 5): queries.append( Query(socorro.SuperSearch.URL, { 'product': product, 'date': search_date, 'build_id': search_buildid, 'signature': ['=' + s for s in sgns], 'release_channel': channel, '_aggs.proto_signature': ['uuid', 'signature'], '_facets_size': 10000, '_results_number': 0 }, handler=handler, handlerdata=data)) socorro.SuperSearch(queries=queries).wait() return data
def get_stats_for_past_weeks(product, channel, start_date_by_channel, versions_by_channel, analysis, search_start_date, end_date, check_for_fx=True): queries = [] trends = {} signatures_by_chan = {} default_trend_by_chan = {} ref_monday, _ = utils.get_monday_sunday(utils.get_date_ymd(end_date)) def get_past_week(date): monday, _ = utils.get_monday_sunday(date) return (ref_monday - monday).days // 7 for chan in channel: past_w = get_past_week(start_date_by_channel[chan]) default_trend_by_chan[chan] = {i: 0 for i in range(past_w + 1)} for signature, info in analysis.items(): if not check_for_fx or info['firefox']: data = {} trends[signature] = data # for chan, volume in info['affected']: for chan in channel: if chan in signatures_by_chan: signatures_by_chan[chan].append(signature) else: signatures_by_chan[chan] = [signature] data[chan] = default_trend_by_chan[chan].copy() def handler_ss(chan, json, data): sgns = [] for facets in json['facets']['histogram_date']: d = utils.get_date_ymd(facets['term']) w = get_past_week(d) s = facets['facets']['signature'] for signature in s: count = signature['count'] sgn = signature['term'] sgns.append(sgn) data[sgn][chan][w] += count for chan, signatures in signatures_by_chan.items(): if search_start_date: search_date = socorro.SuperSearch.get_search_date( search_start_date, end_date) else: search_date = socorro.SuperSearch.get_search_date( utils.get_date_str(start_date_by_channel[chan]), end_date) vers = versions_by_channel[chan] for sgns in Connection.chunks(signatures, 10): queries.append( Query(socorro.SuperSearch.URL, { 'signature': ['=' + s for s in sgns], 'product': product, 'version': vers, 'release_channel': chan, 'date': search_date, '_histogram.date': 'signature', '_results_number': 0 }, handler=functools.partial(handler_ss, chan), handlerdata=trends)) socorro.SuperSearch(queries=queries).wait() return trends