Example #1
0
    def get_data(params):
        if os.path.isfile(MySuperSearch.PATH):
            with open(MySuperSearch.PATH, 'r') as In:
                data = json.load(In)
        else:
            data = {}

        for k, v in params.items():
            if isinstance(v, list):
                params[k] = sorted(v)

        params_str = get_params_query(params)
        if params_str not in data:
            hdata = []
            Connection(
                socorro.Socorro.API_URL,
                queries=Query(_SuperSearch.URL, params, MySuperSearch.handler,
                              hdata),
            ).wait()
            data[params_str] = hdata[0]
            dumpjson(MySuperSearch.PATH, data)

            return hdata[0]

        return data[params_str]
Example #2
0
def get_nick(authors):
    bz = {}

    def user_handler(u):
        real = u['real_name']
        m = NICK_PAT.search(real)
        nick = m.group(1) if m else ''
        name = u['name']
        bz[name] = {'name': name,
                    'real_name': real,
                    'nick_name': nick}

    authors = list(authors)
    queries = []
    for chunk in Connection.chunks(authors, 20):
        query = BugzillaUser(user_names=chunk,
                             include_fields=['name', 'real_name'],
                             user_handler=user_handler)
        queries.append(query)

    for q in queries:
        q.wait()

    authors = [bz[a] for a in authors if a in bz]
    return authors
def get_proto_small(product, signatures, search_date, channel):
    """Get the proto-signatures for signature with a small number of crashes.
       Since we 'must' aggregate uuid on proto-signatures, to be faster we query
       several signatures: it's possible because we know that card(proto) <= card(crashes)
       for a given signature."""
    logger.info('Get proto-signatures (small) for {}-{}: started.'.format(
        product, channel))

    def handler(bid, threshold, json, data):
        if not json['facets']['proto_signature']:
            return
        for facets in json['facets']['proto_signature']:
            _facets = facets['facets']
            sgn = _facets['signature'][0]['term']
            protos = data[sgn]['protos'][bid]
            if len(protos) < threshold:
                proto = facets['term']
                count = facets['count']
                uuid = _facets['uuid'][0]['term']
                protos.append({'proto': proto, 'count': count, 'uuid': uuid})
        for facets in json['facets']['signature']:
            sgn = facets['term']
            count = facets['facets']['cardinality_install_time']['value']
            data[sgn]['installs'][bid] = 1 if count == 0 else count

    limit = config.get_limit_facets()
    threshold = config.get_threshold('protos', product, channel)
    base_params = {
        'product': product,
        'release_channel': utils.get_search_channel(channel),
        'date': search_date,
        'build_id': '',
        'signature': '',
        '_aggs.proto_signature': ['uuid', 'signature'],
        '_aggs.signature': '_cardinality.install_time',
        '_results_number': 0,
        '_facets': 'release_channel',
        '_facets_size': limit
    }

    sgns_by_bids = utils.get_sgns_by_bids(signatures)
    for bid, all_signatures in sgns_by_bids.items():
        params = copy.deepcopy(base_params)
        params['build_id'] = utils.get_buildid(bid)
        queries = []
        hdler = functools.partial(handler, bid, threshold)
        for sgns in Connection.chunks(all_signatures, 10):
            params = copy.deepcopy(params)
            params['signature'] = ['=' + s for s in sgns]
            queries.append(
                Query(socorro.SuperSearch.URL,
                      params=params,
                      handler=hdler,
                      handlerdata=signatures))

        socorro.SuperSearch(queries=queries).wait()

    logger.info('Get proto-signatures (small) for {}-{}: finished.'.format(
        product, channel))
Example #4
0
def get_stats_for_past_weeks(product, channel, start_date_by_channel, versions_by_channel, analysis, search_start_date, end_date, check_for_fx=True):
    queries = []
    trends = {}
    signatures_by_chan = {}
    default_trend_by_chan = {}
    ref_monday, _ = utils.get_monday_sunday(utils.get_date_ymd(end_date))

    def get_past_week(date):
        monday, _ = utils.get_monday_sunday(date)
        return (ref_monday - monday).days // 7

    for chan in channel:
        past_w = get_past_week(start_date_by_channel[chan])
        default_trend_by_chan[chan] = {i: 0 for i in range(past_w + 1)}

    for signature, info in analysis.items():
        if not check_for_fx or info['firefox']:
            data = {}
            trends[signature] = data
            # for chan, volume in info['affected']:
            for chan in channel:
                if chan in signatures_by_chan:
                    signatures_by_chan[chan].append(signature)
                else:
                    signatures_by_chan[chan] = [signature]
                data[chan] = default_trend_by_chan[chan].copy()

    def handler_ss(chan, json, data):
        sgns = []
        for facets in json['facets']['histogram_date']:
            d = utils.get_date_ymd(facets['term'])
            w = get_past_week(d)
            s = facets['facets']['signature']
            for signature in s:
                count = signature['count']
                sgn = signature['term']
                sgns.append(sgn)
                data[sgn][chan][w] += count

    for chan, signatures in signatures_by_chan.items():
        if search_start_date:
            search_date = socorro.SuperSearch.get_search_date(search_start_date, end_date)
        else:
            search_date = socorro.SuperSearch.get_search_date(utils.get_date_str(start_date_by_channel[chan]), end_date)

        vers = versions_by_channel[chan]
        for sgns in Connection.chunks(signatures, 10):
            queries.append(Query(socorro.SuperSearch.URL,
                                 {'signature': ['=' + s for s in sgns],
                                  'product': product,
                                  'version': vers,
                                  'release_channel': chan,
                                  'date': search_date,
                                  '_histogram.date': 'signature',
                                  '_results_number': 0},
                           handler=functools.partial(handler_ss, chan), handlerdata=trends))
    socorro.SuperSearch(queries=queries).wait()

    return trends
Example #5
0
def filter_nightly_buildids(buildids):
    def handler(threshold, json, data):
        if not json['facets']['build_id']:
            return
        for facets in json['facets']['build_id']:
            count = facets['count']
            if count >= threshold:
                bid = utils.get_build_date(facets['term'])
                data[bid] = True

    params = {
        'product': '',
        'build_id': '',
        'date': '',
        'release_channel': 'nightly',
        '_aggs.build_id': 'release_channel',
        '_results_number': 0,
        '_facets': 'release_channel',
        '_facets_size': 1000
    }

    data = {'Firefox': None, 'FennecAndroid': None}
    queries = []
    for prod in data.keys():
        pparams = copy.deepcopy(params)
        pparams['product'] = prod
        threshold = config.get_min_total(prod, 'nightly')
        data[prod] = D = {}
        for bids in Connection.chunks(buildids[prod]['nightly'],
                                      chunk_size=128):
            pparams = copy.deepcopy(pparams)
            pparams['date'] = '>=' + bids[0][0].strftime('%Y-%m-%d')
            pparams['build_id'] = L = []
            for b in bids:
                L.append(utils.get_buildid(b[0]))
                D[b[0]] = False

            hdler = functools.partial(handler, threshold)
            queries.append(
                Query(socorro.SuperSearch.URL,
                      params=pparams,
                      handler=hdler,
                      handlerdata=D))

    socorro.SuperSearch(queries=queries).wait()

    for prod, info in data.items():
        bids = buildids[prod]['nightly']
        L = [(bid, info[bid[0]]) for bid in bids]
        for i in range(len(L) - 1, -1, -1):
            if not L[i][1]:
                L[i] = (L[i][0], True)
            else:
                break
        buildids[prod]['nightly'] = [x[0] for x in L if x[1]]
Example #6
0
def get_uuids_fennec(signatures, search_date, channel):
    """Get the uuids for Fennec java crashes"""
    logger.info("Get uuids for Fennec-{}: started.".format(channel))

    def handler(json, data):
        if json["errors"] or not json["facets"]["signature"]:
            return
        bid = json["facets"]["build_id"][0]["term"]
        bid = utils.get_build_date(bid)
        for facets in json["facets"]["signature"]:
            sgn = facets["term"]
            count = facets["count"]
            facets = facets["facets"]
            uuid = facets["uuid"][0]["term"]
            protos = data[sgn]["protos"][bid]
            if not protos:
                protos.append({"proto": "", "count": count, "uuid": uuid})

    base_params = {
        "product": "Fennec",
        "release_channel": utils.get_search_channel(channel),
        "date": search_date,
        "build_id": "",
        "signature": "",
        "_aggs.signature": "uuid",
        "_results_number": 0,
        "_facets": "build_id",
        "_facets_size": 100,
    }

    queries = []
    sgns_by_bids = utils.get_sgns_by_bids(signatures)

    for bid, all_signatures in sgns_by_bids.items():
        params = copy.deepcopy(base_params)
        params["build_id"] = utils.get_buildid(bid)

        for sgns in Connection.chunks(all_signatures, 10):
            params = copy.deepcopy(params)
            params["signature"] = ["=" + s for s in sgns]
            queries.append(
                Query(
                    socorro.SuperSearch.URL,
                    params=params,
                    handler=handler,
                    handlerdata=signatures,
                ))
    socorro.SuperSearch(queries=queries).wait()

    logger.info("Get uuids for Fennec-{}: finished.".format(channel))
def get_uuids_fennec(signatures, search_date, channel):
    """Get the uuids for FennecAndroid java crashes"""
    logger.info('Get uuids for FennecAndroid-{}: started.'.format(channel))

    def handler(json, data):
        if json['errors'] or not json['facets']['signature']:
            return
        bid = json['facets']['build_id'][0]['term']
        bid = utils.get_build_date(bid)
        for facets in json['facets']['signature']:
            sgn = facets['term']
            count = facets['count']
            facets = facets['facets']
            uuid = facets['uuid'][0]['term']
            protos = data[sgn]['protos'][bid]
            if not protos:
                protos.append({'proto': '', 'count': count, 'uuid': uuid})

    base_params = {
        'product': 'FennecAndroid',
        'release_channel': utils.get_search_channel(channel),
        'date': search_date,
        'build_id': '',
        'signature': '',
        '_aggs.signature': 'uuid',
        '_results_number': 0,
        '_facets': 'build_id',
        '_facets_size': 100
    }

    queries = []
    sgns_by_bids = utils.get_sgns_by_bids(signatures)

    for bid, all_signatures in sgns_by_bids.items():
        params = copy.deepcopy(base_params)
        params['build_id'] = utils.get_buildid(bid)

        for sgns in Connection.chunks(all_signatures, 10):
            params = copy.deepcopy(params)
            params['signature'] = ['=' + s for s in sgns]
            queries.append(
                Query(socorro.SuperSearch.URL,
                      params=params,
                      handler=handler,
                      handlerdata=signatures))
    socorro.SuperSearch(queries=queries).wait()

    logger.info('Get uuids for FennecAndroid-{}: finished.'.format(channel))
Example #8
0
    def get_data(params, channel):
        if os.path.isfile(MyRevision.PATH):
            with open(MyRevision.PATH, 'r') as In:
                data = json.load(In)
        else:
            data = {}

        params_str = get_params_query(params)
        if params_str not in data:
            hdata = []
            Connection(
                Mercurial.HG_URL,
                queries=Query(_Revision.get_url(channel), params,
                              MyRevision.handler, hdata),
            ).wait()
            data[params_str] = hdata[0]
            dumpjson(MyRevision.PATH, data)

            return hdata[0]

        return data[params_str]
Example #9
0
def get_sgns_info(sgns_by_chan,
                  product='Firefox',
                  date='today',
                  query={},
                  versions=None):
    today = utils.get_date(date)
    tomorrow = utils.get_date(date, -1)
    data = {chan: {} for chan in sgns_by_chan.keys()}

    def handler(json, data):
        if json['errors'] or not json['facets']['signature']:
            return

        for facets in json['facets']['signature']:
            sgn = facets['term']
            count = facets['count']
            platforms = defaultdict(lambda: 0)
            startup = {True: 0, False: 0}
            data[sgn] = {
                'count': count,
                'platforms': platforms,
                'startup': startup
            }
            facets = facets['facets']
            for ppv in facets['platform_pretty_version']:
                platforms[ppv['term']] += ppv['count']
            for sc in facets['startup_crash']:
                term = sc['term']
                startup[term == 'T'] += sc['count']

    base = {
        'product': product,
        'date': ['>=' + today, '<' + tomorrow],
        'release_channel': '',
        'version': '',
        'signature': '',
        '_aggs.signature': ['platform_pretty_version', 'startup_crash'],
        '_results_number': 0,
        '_facets_size': 100
    }

    base.update(query)

    searches = []
    for chan, signatures in sgns_by_chan.items():
        params = copy.copy(base)
        params['release_channel'] = chan
        if versions:
            params['version'] = versions[chan]
        for sgns in Connection.chunks(signatures, 10):
            p = copy.copy(params)
            p['signature'] = ['=' + s for s in sgns]
            searches.append(
                socorro.SuperSearch(params=p,
                                    handler=handler,
                                    handlerdata=data[chan]))

    for s in searches:
        s.wait()

    return data
def get_signatures(limit, product, versions, channel, search_date, signatures,
                   bug_ids, verbose):
    if limit <= 0:
        count = []
        socorro.SuperSearch(
            params={
                'product': product,
                'version': versions,
                'date': search_date,
                'release_channel': channel,
                '_facets_size': 1,
                '_results_number': 0
            },
            handler=lambda json: count.append(json['total'])).wait()
        limit = count[0]

    __warn('Maximum signatures to collect: %d' % limit, verbose)

    __signatures = {}

    known_platforms = {'Windows NT', 'Windows', 'Mac OS X', 'Linux'}
    known_wtf_platforms = {'0x00000000', ''}

    ignored_signatures = get_ignored_signatures()

    def handler_ss(json, data):
        n = 0
        for bucket in json['facets']['signature']:
            signature = bucket['term']
            if signature in ignored_signatures:
                continue
            n += 1
            if n > limit:
                break

            l1 = []
            l2 = []
            data[signature] = {
                'affected_channels': l1,
                'platforms': l2,
                'selected_bug': None,
                'jsbugmon': 0,
                'bugs': None
            }
            facets = bucket['facets']
            for c in facets['release_channel']:
                l1.append((c['term'], c['count']))
            for p in facets['platform']:
                os = p['term']
                if os and os in known_platforms:
                    if os == 'Windows NT':
                        os = 'Windows'
                    l2.append(os)
                elif os not in known_wtf_platforms:
                    __warn('Unknown os: %s' % os)

    all_versions = []
    for c in channel:
        all_versions += versions[c]

    if signatures or bug_ids:
        if bug_ids:
            _sgns = Bugzilla.get_signatures(bug_ids)
            set_sgns = set(signatures)
            for ss in _sgns.values():
                if ss:
                    set_sgns = set_sgns.union(set(ss))
            signatures = list(set_sgns)
        queries = []
        for sgns in Connection.chunks(signatures, 10):
            queries.append(
                Query(socorro.SuperSearch.URL, {
                    'signature': ['=' + s for s in sgns],
                    'product': product,
                    'version': all_versions,
                    'release_channel': channel,
                    'date': search_date,
                    '_aggs.signature': ['release_channel', 'platform'],
                    '_facets_size': max(limit, 100),
                    '_results_number': 0
                },
                      handler=handler_ss,
                      handlerdata=__signatures))
        socorro.SuperSearch(queries=queries).wait()
    else:
        socorro.SuperSearch(params={
            'product': product,
            'version': all_versions,
            'release_channel': channel,
            'date': search_date,
            '_aggs.signature': ['release_channel', 'platform'],
            '_facets_size': max(limit, 100),
            '_results_number': 0
        },
                            handler=handler_ss,
                            handlerdata=__signatures,
                            timeout=300).wait()

    return __signatures
Example #11
0
def get(channel,
        date,
        product='Firefox',
        duration=11,
        tc_limit=50,
        crash_type='all',
        startup=False):
    """Get crashes info

    Args:
        channel (str): the channel
        date (str): the final date
        product (Optional[str]): the product
        duration (Optional[int]): the duration to retrieve the data
        tc_limit (Optional[int]): the number of topcrashes to load
        crash_type (Optional[str]): 'all' (default) or 'browser' or 'content' or 'plugin'

    Returns:
        dict: contains all the info relative to the crashes
    """
    channel = channel.lower()
    version = v[channel]
    sys.stdout.write('Getting version information from Socorro...')
    sys.stdout.flush()
    versions = versions_util.get_channel_versions(channel, product)
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    if crash_type and isinstance(crash_type, six.string_types):
        crash_type = [crash_type]

    _date = utils.get_date_ymd(date)
    start_date = utils.get_date_str(_date - timedelta(duration - 1))
    end_date = utils.get_date_str(_date)

    signatures = {}

    def signature_handler(json):
        for signature in json['facets']['signature']:
            signatures[signature['term']] = [signature['count'], 0, 0, 0, 0, 0]

            for platform in signature['facets']['platform']:
                if platform['term'] == 'Linux':
                    signatures[signature['term']][3] = platform['count']
                elif platform['term'] == 'Windows NT':
                    signatures[signature['term']][1] = platform['count']
                elif platform['term'] == 'Mac OS X':
                    signatures[signature['term']][2] = platform['count']

            for startup_crash in signature['facets']['startup_crash']:
                if startup_crash['term'] in ['1', 'T']:
                    signatures[signature['term']][4] += startup_crash['count']

            signatures[signature['term']][5] = signature['facets'][
                'cardinality_install_time']['value']

    params = {
        'product':
        product,
        'version':
        versions,
        'date':
        socorro.SuperSearch.get_search_date(start_date, end_date),
        'release_channel':
        channel,
        '_aggs.signature':
        ['platform', '_cardinality.install_time', 'startup_crash'],
        '_results_number':
        0,
        '_facets_size':
        tc_limit,
    }

    if startup:
        params['startup_crash'] = True

    sys.stdout.write('Getting top signatures from Socorro...')
    sys.stdout.flush()
    socorro.SuperSearch(params=params, handler=signature_handler).wait()
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    bug_flags = [
        'resolution', 'id', 'last_change_time',
        'cf_tracking_firefox' + str(version)
    ]
    for i in range(int(version), int(v['nightly']) + 1):
        bug_flags.append('cf_status_firefox' + str(i))

    # TODO: too many requests... should be improved with chunks
    bugs = {}
    # TODO: Use regexp, when the Bugzilla bug that prevents them from working will be fixed.
    base = {
        'j_top': 'OR',
        'o1': 'substring',
        'f1': 'cf_crash_signature',
        'v1': None,
        'o2': 'substring',
        'f2': 'cf_crash_signature',
        'v2': None,
        'o3': 'substring',
        'f3': 'cf_crash_signature',
        'v3': None,
        'o4': 'substring',
        'f4': 'cf_crash_signature',
        'v4': None,
        'include_fields': bug_flags
    }

    queries = []
    for sgn in signatures.keys():
        cparams = base.copy()
        cparams['v1'] = '[@' + sgn + ']'
        cparams['v2'] = '[@ ' + sgn + ' ]'
        cparams['v3'] = '[@ ' + sgn + ']'
        cparams['v4'] = '[@' + sgn + ' ]'
        bugs[sgn] = []
        queries.append(
            Query(Bugzilla.API_URL, cparams, __bug_handler, bugs[sgn]))
    res_bugs = Bugzilla(queries=queries)

    # we have stats by signature in self.signatures
    # for each signature get the number of crashes on the last X days
    # so get the signature trend
    trends = {}
    default_trend = {}
    for i in range(duration):
        default_trend[_date - timedelta(i)] = 0

    base = {
        'product': product,
        'version': versions,
        'signature': None,
        'date': socorro.SuperSearch.get_search_date(start_date, end_date),
        'release_channel': channel,
        '_results_number': 0,
        '_histogram.date': ['signature'],
        '_histogram_interval': 1
    }

    queries = []
    for sgns in Connection.chunks(
            list(map(lambda sgn: '=' + sgn, signatures.keys())), 10):
        sgn_group = []
        for sgn in sgns:
            if sum(len(s) for s in sgn_group) >= 1000:
                cparams = base.copy()
                cparams['signature'] = sgn_group
                queries.append(
                    Query(socorro.SuperSearch.URL, cparams,
                          functools.partial(__trend_handler, default_trend),
                          trends))
                sgn_group = []

            sgn_group.append(sgn)

        if len(sgn_group) > 0:
            cparams = base.copy()
            cparams['signature'] = sgn_group
            queries.append(
                Query(socorro.SuperSearch.URL, cparams,
                      functools.partial(__trend_handler, default_trend),
                      trends))

    sys.stdout.write('Getting trends for top signatures from Socorro...')
    sys.stdout.flush()
    socorro.SuperSearch(queries=queries).wait()
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    for sgn, trend in trends.items():
        signatures[sgn] = (signatures[sgn], [
            trend[key] for key in sorted(trend.keys(), reverse=True)
        ])

    _signatures = {}
    # order self.signatures by crash count
    sorted_signatures = sorted(signatures.items(),
                               key=lambda x: x[1][0][0],
                               reverse=True)
    i = 1
    for s in sorted_signatures:
        _signatures[s[0]] = i  # top crash rank
        i += 1

    sys.stdout.write(
        'Getting bugs linked to the top signatures from Bugzilla...')
    sys.stdout.flush()
    while True:
        try:
            for r in res_bugs.results:
                r.result(timeout=2)
            break
        except TimeoutError:
            sys.stdout.write('.')
            sys.stdout.flush()
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    # TODO: In the first query to get the bugs, also get dupe_of and avoid the first query
    #       in follow_dup (so modify follow_dup to accept both a bug ID or a bug object).
    queries = []
    for sgn in signatures.keys():
        duplicate_ids = [
            bug['id'] for bug in bugs[sgn] if bug['resolution'] == 'DUPLICATE'
        ]

        # Remove bugs resolved as DUPLICATE from the list of bugs associated to the signature.
        bugs[sgn] = [
            bug for bug in bugs[sgn] if bug['id'] not in duplicate_ids
        ]

        # Find duplicates for bugs resolved as DUPLICATE.
        duplicates = {
            k: v
            for k, v in Bugzilla.follow_dup(duplicate_ids).items()
            if v is not None
        }
        duplicate_targets = [
            bug_id for bug_id in duplicates.values()
            if int(bug_id) not in [bug['id'] for bug in bugs[sgn]]
        ]
        if len(duplicate_targets) == 0:
            continue

        # Get info about bugs that the DUPLICATE bugs have been duped to.
        params = {
            'id': ','.join(duplicate_targets),
            'include_fields': bug_flags,
        }
        queries.append(
            Query(Bugzilla.API_URL, params, __bug_handler, bugs[sgn]))
    sys.stdout.write(
        'Resolving duplicate bugs to the bugs they\'ve been duplicated to...')
    sys.stdout.flush()
    Bugzilla(queries=queries).wait()
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    for sgn, stats in signatures.items():
        # stats is 2-uple: ([count, win_count, mac_count, linux_count, startup_count], trend)
        startup_percent = float(stats[0][4]) / float(stats[0][0])
        _signatures[sgn] = {
            'tc_rank': _signatures[sgn],
            'crash_count': stats[0][0],
            'estimated_user_count': stats[0][5],
            'startup_percent': startup_percent,
            'bugs': bugs[sgn]
        }

    return {
        'start_date': start_date,
        'end_date': end_date,
        'versions': list(versions),
        'signatures': _signatures,
    }
Example #12
0
def get_signatures(limit, product, versions, channel, search_date, signatures, bug_ids, verbose):
    if limit <= 0:
        count = []
        socorro.SuperSearch(params={'product': product,
                                    'version': versions,
                                    'date': search_date,
                                    'release_channel': channel,
                                    '_facets_size': 1,
                                    '_results_number': 0},
                            handler=lambda json: count.append(json['total'])).wait()
        limit = count[0]

    __warn('Maximum signatures to collect: %d' % limit, verbose)

    __signatures = {}

    known_platforms = {'Windows NT', 'Windows', 'Mac OS X', 'Linux'}
    known_wtf_platforms = {'0x00000000', ''}

    ignored_signatures = get_ignored_signatures()

    def handler_ss(json, data):
        n = 0
        for bucket in json['facets']['signature']:
            signature = bucket['term']
            if signature in ignored_signatures:
                continue
            n += 1
            if n > limit:
                break

            l1 = []
            l2 = []
            data[signature] = {'affected_channels': l1,
                               'platforms': l2,
                               'selected_bug': None,
                               'jsbugmon': 0,
                               'bugs': None}
            facets = bucket['facets']
            for c in facets['release_channel']:
                l1.append((c['term'], c['count']))
            for p in facets['platform']:
                os = p['term']
                if os and os in known_platforms:
                    if os == 'Windows NT':
                        os = 'Windows'
                    l2.append(os)
                elif os not in known_wtf_platforms:
                    __warn('Unknown os: %s' % os)

    all_versions = []
    for c in channel:
        all_versions += versions[c]

    if signatures or bug_ids:
        if bug_ids:
            _sgns = Bugzilla.get_signatures(bug_ids)
            set_sgns = set(signatures)
            for ss in _sgns.values():
                if ss:
                    set_sgns = set_sgns.union(set(ss))
            signatures = list(set_sgns)
        queries = []
        for sgns in Connection.chunks(signatures, 10):
            queries.append(Query(socorro.SuperSearch.URL,
                                 {'signature': ['=' + s for s in sgns],
                                  'product': product,
                                  'version': all_versions,
                                  'release_channel': channel,
                                  'date': search_date,
                                  '_aggs.signature': ['release_channel', 'platform'],
                                  '_facets_size': max(limit, 100),
                                  '_results_number': 0},
                                 handler=handler_ss, handlerdata=__signatures))
        socorro.SuperSearch(queries=queries).wait()
    else:
        socorro.SuperSearch(params={'product': product,
                                    'version': all_versions,
                                    'release_channel': channel,
                                    'date': search_date,
                                    '_aggs.signature': ['release_channel', 'platform'],
                                    '_facets_size': max(limit, 100),
                                    '_results_number': 0},
                            handler=handler_ss, handlerdata=__signatures, timeout=300).wait()

    return __signatures
Example #13
0
def get_proto_small(product, signatures, search_date, channel):
    """Get the proto-signatures for signature with a small number of crashes.
    Since we 'must' aggregate uuid on proto-signatures, to be faster we query
    several signatures: it's possible because we know that card(proto) <= card(crashes)
    for a given signature."""
    logger.info("Get proto-signatures (small) for {}-{}: started.".format(
        product, channel))

    def handler(bid, threshold, json, data):
        if not json["facets"]["proto_signature"]:
            return
        for facets in json["facets"]["proto_signature"]:
            _facets = facets["facets"]
            sgn = _facets["signature"][0]["term"]
            protos = data[sgn]["protos"][bid]
            if len(protos) < threshold:
                proto = facets["term"]
                count = facets["count"]
                uuid = _facets["uuid"][0]["term"]
                protos.append({"proto": proto, "count": count, "uuid": uuid})
        for facets in json["facets"]["signature"]:
            sgn = facets["term"]
            count = facets["facets"]["cardinality_install_time"]["value"]
            data[sgn]["installs"][bid] = 1 if count == 0 else count

    limit = config.get_limit_facets()
    threshold = config.get_threshold("protos", product, channel)
    base_params = {
        "product": product,
        "release_channel": utils.get_search_channel(channel),
        "date": search_date,
        "build_id": "",
        "signature": "",
        "_aggs.proto_signature": ["uuid", "signature"],
        "_aggs.signature": "_cardinality.install_time",
        "_results_number": 0,
        "_facets": "release_channel",
        "_facets_size": limit,
    }

    sgns_by_bids = utils.get_sgns_by_bids(signatures)
    for bid, all_signatures in sgns_by_bids.items():
        params = copy.deepcopy(base_params)
        params["build_id"] = utils.get_buildid(bid)
        queries = []
        hdler = functools.partial(handler, bid, threshold)
        for sgns in Connection.chunks(all_signatures, 5):
            params = copy.deepcopy(params)
            params["signature"] = ["=" + s for s in sgns]
            queries.append(
                Query(
                    socorro.SuperSearch.URL,
                    params=params,
                    handler=hdler,
                    handlerdata=signatures,
                ))

        socorro.SuperSearch(queries=queries).wait()

    logger.info("Get proto-signatures (small) for {}-{}: finished.".format(
        product, channel))
Example #14
0
def get_uuids(channel,
              product='Firefox',
              date='today',
              limit=10000,
              max_days=3,
              threshold=5):
    end_date = utils.get_date_ymd(date)
    start_date = end_date - timedelta(days=max_days + 1)
    search_date = socorro.SuperSearch.get_search_date(start_date, end_date)

    r = range(max_days + 1)
    default_trend = {start_date + timedelta(days=i): 0 for i in r}
    data = defaultdict(lambda: copy.deepcopy(default_trend))

    def handler(json, data):
        if not json['errors']:
            for facets in json['facets']['histogram_date']:
                d = utils.get_date_ymd(facets['term'])
                s = facets['facets']['signature']
                for signature in s:
                    count = signature['count']
                    sgn = signature['term']
                    data[sgn][d] += count

    socorro.SuperSearch(params={
        'product': product,
        'date': search_date,
        'release_channel': channel,
        '_histogram.date': 'signature',
        '_facets_size': limit,
        '_results_number': 1
    },
                        handler=handler,
                        handlerdata=data).wait()

    new_signatures = get_new_signatures(data, threshold=threshold)

    if new_signatures:
        data = {}
        queries = []

        def handler(json, data):
            if not json['errors']:
                for facets in json['facets']['proto_signature']:
                    proto = facets['term']
                    count = facets['count']
                    facets = facets['facets']
                    signature = facets['signature'][0]['term']
                    first_uuid = facets['uuid'][0]['term']
                    data[proto] = {
                        'uuid': first_uuid,
                        'count': count,
                        'signature': signature
                    }

        for sgns in Connection.chunks(new_signatures, 5):
            queries.append(
                Query(socorro.SuperSearch.URL, {
                    'product': product,
                    'date': search_date,
                    'signature': ['=' + s for s in sgns],
                    'release_channel': channel,
                    '_aggs.proto_signature': ['uuid', 'signature'],
                    '_facets_size': 1000,
                    '_results_number': 0
                },
                      handler=handler,
                      handlerdata=data))

        socorro.SuperSearch(queries=queries).wait()
        return data, search_date

    return {}, ''
Example #15
0
def get_uuids_for_spiking_signatures(channel, cache=None, product='Firefox', date='today', limit=10000, max_days=3, threshold=5):
    psttz = pytz.timezone('US/Pacific')
    end_date = utils.get_date_ymd(date)  # 2016-10-18 UTC
    end_date_moz = psttz.localize(datetime(end_date.year, end_date.month, end_date.day))  # 2016-10-18 PST
    end_buildid = utils.get_buildid_from_date(end_date_moz)  # < 20161018000000
    start_date_moz = end_date_moz - timedelta(days=max_days + 1)  # 2016-10-14 PST (max_days == 3)
    start_buildid = utils.get_buildid_from_date(start_date_moz)  # >= 20161014000000
    search_buildid = ['>=' + start_buildid, '<' + end_buildid]
    start_date = utils.as_utc(start_date_moz)  # 2016-10-14 07:00:00 UTC
    search_date = '>=' + utils.get_date_str(start_date)
    data = defaultdict(lambda: defaultdict(lambda: 0))
    buildids = {}

    def handler(json, data):
        if not json['errors']:
            for facets in json['facets']['build_id']:
                date = utils.get_date_from_buildid(facets['term']).astimezone(psttz)
                buildids[date] = facets['count']
                for s in facets['facets']['signature']:
                    sgn = s['term']
                    count = s['count']
                    data[sgn][date] += count

    socorro.SuperSearch(params={'product': product,
                                'date': search_date,
                                'build_id': search_buildid,
                                'release_channel': channel,
                                '_aggs.build_id': 'signature',
                                '_facets_size': limit,
                                '_results_number': 0},
                        handler=handler, handlerdata=data).wait()

    _data = {}
    base = {start_date_moz + timedelta(days=i): {'buildids': {}, 'total': 0} for i in range(max_days + 1)}  # from 2016-10-14 to 2016-10-17 PST

    for sgn, info in data.items():
        d = copy.deepcopy(base)
        _data[sgn] = d
        for bid, count in info.items():
            date = psttz.localize(datetime(bid.year, bid.month, bid.day))
            d[date]['buildids'][bid] = count
            d[date]['total'] += count
    data = _data

    spiking_signatures = []
    for sgn, info in data.items():
        stats2 = [i['total'] for _, i in sorted(info.items(), key=lambda p: p[0])]
        if all(i == 0 for i in stats2[:-1]) and stats2[-1] >= threshold:
            spiking_signatures.append(sgn)

    data = None
    if spiking_signatures:
        # sort the signatures to be sure to always have the same order for the test
        spiking_signatures = sorted(spiking_signatures)

        start_buildid = utils.get_buildid_from_date(end_date_moz - timedelta(days=1))
        search_buildid = ['>=' + start_buildid, '<' + end_buildid]
        queries = []
        data = defaultdict(lambda: list())

        def handler(json, data):
            if not json['errors']:
                for facets in json['facets']['proto_signature']:
                    proto = facets['term']
                    count = facets['count']
                    facets = facets['facets']
                    sgn = facets['signature'][0]['term']
                    first_uuid = facets['uuid'][0]['term']
                    uuids = {i['term'] for i in facets['uuid']}
                    if cache:
                        i = uuids.intersection(cache['uuids'])
                        uuid = i.pop() if i else first_uuid
                    else:
                        uuid = first_uuid
                    data[sgn].append({'proto': proto, 'uuid': uuid, 'count': count})

        for sgns in Connection.chunks(spiking_signatures, 5):
            queries.append(Query(socorro.SuperSearch.URL,
                                 {'product': product,
                                  'date': search_date,
                                  'build_id': search_buildid,
                                  'signature': ['=' + s for s in sgns],
                                  'release_channel': channel,
                                  '_aggs.proto_signature': ['uuid', 'signature'],
                                  '_facets_size': 10000,
                                  '_results_number': 0},
                                 handler=handler, handlerdata=data))

        socorro.SuperSearch(queries=queries).wait()

    return data
def get_uuids_for_spiking_signatures(channel,
                                     cache=None,
                                     product='Firefox',
                                     date='today',
                                     limit=10000,
                                     max_days=3,
                                     threshold=5):
    psttz = pytz.timezone('US/Pacific')
    end_date = utils.get_date_ymd(date)  # 2016-10-18 UTC
    end_date_moz = psttz.localize(
        datetime(end_date.year, end_date.month,
                 end_date.day))  # 2016-10-18 PST
    end_buildid = utils.get_buildid_from_date(end_date_moz)  # < 20161018000000
    start_date_moz = end_date_moz - timedelta(
        days=max_days + 1)  # 2016-10-14 PST (max_days == 3)
    start_buildid = utils.get_buildid_from_date(
        start_date_moz)  # >= 20161014000000
    search_buildid = ['>=' + start_buildid, '<' + end_buildid]
    start_date = utils.as_utc(start_date_moz)  # 2016-10-14 07:00:00 UTC
    search_date = '>=' + utils.get_date_str(start_date)
    data = defaultdict(lambda: defaultdict(lambda: 0))
    buildids = {}

    def handler(json, data):
        if not json['errors']:
            for facets in json['facets']['build_id']:
                date = utils.get_date_from_buildid(
                    facets['term']).astimezone(psttz)
                buildids[date] = facets['count']
                for s in facets['facets']['signature']:
                    sgn = s['term']
                    count = s['count']
                    data[sgn][date] += count

    socorro.SuperSearch(params={
        'product': product,
        'date': search_date,
        'build_id': search_buildid,
        'release_channel': channel,
        '_aggs.build_id': 'signature',
        '_facets_size': limit,
        '_results_number': 0
    },
                        handler=handler,
                        handlerdata=data).wait()

    _data = {}
    base = {
        start_date_moz + timedelta(days=i): {
            'buildids': {},
            'total': 0
        }
        for i in range(max_days + 1)
    }  # from 2016-10-14 to 2016-10-17 PST

    for sgn, info in data.items():
        d = copy.deepcopy(base)
        _data[sgn] = d
        for bid, count in info.items():
            date = psttz.localize(datetime(bid.year, bid.month, bid.day))
            d[date]['buildids'][bid] = count
            d[date]['total'] += count
    data = _data

    spiking_signatures = []
    for sgn, info in data.items():
        stats2 = [
            i['total'] for _, i in sorted(info.items(), key=lambda p: p[0])
        ]
        if all(i == 0 for i in stats2[:-1]) and stats2[-1] >= threshold:
            spiking_signatures.append(sgn)

    data = None
    if spiking_signatures:
        # sort the signatures to be sure to always have the same order for the test
        spiking_signatures = sorted(spiking_signatures)

        start_buildid = utils.get_buildid_from_date(end_date_moz -
                                                    timedelta(days=1))
        search_buildid = ['>=' + start_buildid, '<' + end_buildid]
        queries = []
        data = defaultdict(lambda: list())

        def handler(json, data):
            if not json['errors']:
                for facets in json['facets']['proto_signature']:
                    proto = facets['term']
                    count = facets['count']
                    facets = facets['facets']
                    sgn = facets['signature'][0]['term']
                    first_uuid = facets['uuid'][0]['term']
                    uuids = {i['term'] for i in facets['uuid']}
                    if cache:
                        i = uuids.intersection(cache['uuids'])
                        uuid = i.pop() if i else first_uuid
                    else:
                        uuid = first_uuid
                    data[sgn].append({
                        'proto': proto,
                        'uuid': uuid,
                        'count': count
                    })

        for sgns in Connection.chunks(spiking_signatures, 5):
            queries.append(
                Query(socorro.SuperSearch.URL, {
                    'product': product,
                    'date': search_date,
                    'build_id': search_buildid,
                    'signature': ['=' + s for s in sgns],
                    'release_channel': channel,
                    '_aggs.proto_signature': ['uuid', 'signature'],
                    '_facets_size': 10000,
                    '_results_number': 0
                },
                      handler=handler,
                      handlerdata=data))

        socorro.SuperSearch(queries=queries).wait()

    return data
def get_stats_for_past_weeks(product,
                             channel,
                             start_date_by_channel,
                             versions_by_channel,
                             analysis,
                             search_start_date,
                             end_date,
                             check_for_fx=True):
    queries = []
    trends = {}
    signatures_by_chan = {}
    default_trend_by_chan = {}
    ref_monday, _ = utils.get_monday_sunday(utils.get_date_ymd(end_date))

    def get_past_week(date):
        monday, _ = utils.get_monday_sunday(date)
        return (ref_monday - monday).days // 7

    for chan in channel:
        past_w = get_past_week(start_date_by_channel[chan])
        default_trend_by_chan[chan] = {i: 0 for i in range(past_w + 1)}

    for signature, info in analysis.items():
        if not check_for_fx or info['firefox']:
            data = {}
            trends[signature] = data
            # for chan, volume in info['affected']:
            for chan in channel:
                if chan in signatures_by_chan:
                    signatures_by_chan[chan].append(signature)
                else:
                    signatures_by_chan[chan] = [signature]
                data[chan] = default_trend_by_chan[chan].copy()

    def handler_ss(chan, json, data):
        sgns = []
        for facets in json['facets']['histogram_date']:
            d = utils.get_date_ymd(facets['term'])
            w = get_past_week(d)
            s = facets['facets']['signature']
            for signature in s:
                count = signature['count']
                sgn = signature['term']
                sgns.append(sgn)
                data[sgn][chan][w] += count

    for chan, signatures in signatures_by_chan.items():
        if search_start_date:
            search_date = socorro.SuperSearch.get_search_date(
                search_start_date, end_date)
        else:
            search_date = socorro.SuperSearch.get_search_date(
                utils.get_date_str(start_date_by_channel[chan]), end_date)

        vers = versions_by_channel[chan]
        for sgns in Connection.chunks(signatures, 10):
            queries.append(
                Query(socorro.SuperSearch.URL, {
                    'signature': ['=' + s for s in sgns],
                    'product': product,
                    'version': vers,
                    'release_channel': chan,
                    'date': search_date,
                    '_histogram.date': 'signature',
                    '_results_number': 0
                },
                      handler=functools.partial(handler_ss, chan),
                      handlerdata=trends))
    socorro.SuperSearch(queries=queries).wait()

    return trends