Beispiel #1
0
def reduce_set_of_bugs(bugs_by_signature):
    # we remove dup bugs
    # for example if we've {1,2,3,4,5} and if 2 is a dup of 5 then the set will be reduced to {1,3,4,5}
    bugs = set()
    for v in bugs_by_signature.values():
        bugs = bugs.union(v)

    dups = Bugzilla.follow_dup(bugs, only_final=False)
    bugs_count = 0
    bugs.clear()
    for s, bugids in bugs_by_signature.items():
        _bugids = set(bugids)
        toremove = set()
        for bugid in bugids:
            chain = dups[str(bugid)]
            if chain:
                elems = []
                for e in chain:
                    e = int(e)
                    if e in _bugids:
                        elems.append(e)
                if elems:
                    elems[-1] = bugid  # we remove the final and put the initial
                    toremove = toremove.union(elems)
        diff = _bugids - toremove
        bugs_by_signature[s] = list(diff)
        bugs_count += len(diff)
        bugs = bugs.union(diff)

    return bugs, bugs_count
def reduce_set_of_bugs(bugs_by_signature):
    # we remove dup bugs
    # for example if we've {1,2,3,4,5} and if 2 is a dup of 5 then the set will be reduced to {1,3,4,5}
    bugs = set()
    for v in bugs_by_signature.values():
        bugs = bugs.union(v)

    dups = Bugzilla.follow_dup(bugs, only_final=False)
    bugs_count = 0
    bugs.clear()
    for s, bugids in bugs_by_signature.items():
        _bugids = set(bugids)
        toremove = set()
        for bugid in bugids:
            chain = dups[str(bugid)]
            if chain:
                elems = []
                for e in chain:
                    e = int(e)
                    if e in _bugids:
                        elems.append(e)
                if elems:
                    elems[
                        -1] = bugid  # we remove the final and put the initial
                    toremove = toremove.union(elems)
        diff = _bugids - toremove
        bugs_by_signature[s] = list(diff)
        bugs_count += len(diff)
        bugs = bugs.union(diff)

    return bugs, bugs_count
Beispiel #3
0
def get(channel,
        date,
        product='Firefox',
        duration=11,
        tc_limit=50,
        crash_type='all',
        startup=False):
    """Get crashes info

    Args:
        channel (str): the channel
        date (str): the final date
        product (Optional[str]): the product
        duration (Optional[int]): the duration to retrieve the data
        tc_limit (Optional[int]): the number of topcrashes to load
        crash_type (Optional[str]): 'all' (default) or 'browser' or 'content' or 'plugin'

    Returns:
        dict: contains all the info relative to the crashes
    """
    channel = channel.lower()
    version = v[channel]
    sys.stdout.write('Getting version information from Socorro...')
    sys.stdout.flush()
    versions = versions_util.get_channel_versions(channel, product)
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    if crash_type and isinstance(crash_type, six.string_types):
        crash_type = [crash_type]

    _date = utils.get_date_ymd(date)
    start_date = utils.get_date_str(_date - timedelta(duration - 1))
    end_date = utils.get_date_str(_date)

    signatures = {}

    def signature_handler(json):
        for signature in json['facets']['signature']:
            signatures[signature['term']] = [signature['count'], 0, 0, 0, 0, 0]

            for platform in signature['facets']['platform']:
                if platform['term'] == 'Linux':
                    signatures[signature['term']][3] = platform['count']
                elif platform['term'] == 'Windows NT':
                    signatures[signature['term']][1] = platform['count']
                elif platform['term'] == 'Mac OS X':
                    signatures[signature['term']][2] = platform['count']

            for startup_crash in signature['facets']['startup_crash']:
                if startup_crash['term'] in ['1', 'T']:
                    signatures[signature['term']][4] += startup_crash['count']

            signatures[signature['term']][5] = signature['facets'][
                'cardinality_install_time']['value']

    params = {
        'product':
        product,
        'version':
        versions,
        'date':
        socorro.SuperSearch.get_search_date(start_date, end_date),
        'release_channel':
        channel,
        '_aggs.signature':
        ['platform', '_cardinality.install_time', 'startup_crash'],
        '_results_number':
        0,
        '_facets_size':
        tc_limit,
    }

    if startup:
        params['startup_crash'] = True

    sys.stdout.write('Getting top signatures from Socorro...')
    sys.stdout.flush()
    socorro.SuperSearch(params=params, handler=signature_handler).wait()
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    bug_flags = [
        'resolution', 'id', 'last_change_time',
        'cf_tracking_firefox' + str(version)
    ]
    for i in range(int(version), int(v['nightly']) + 1):
        bug_flags.append('cf_status_firefox' + str(i))

    # TODO: too many requests... should be improved with chunks
    bugs = {}
    # TODO: Use regexp, when the Bugzilla bug that prevents them from working will be fixed.
    base = {
        'j_top': 'OR',
        'o1': 'substring',
        'f1': 'cf_crash_signature',
        'v1': None,
        'o2': 'substring',
        'f2': 'cf_crash_signature',
        'v2': None,
        'o3': 'substring',
        'f3': 'cf_crash_signature',
        'v3': None,
        'o4': 'substring',
        'f4': 'cf_crash_signature',
        'v4': None,
        'include_fields': bug_flags
    }

    queries = []
    for sgn in signatures.keys():
        cparams = base.copy()
        cparams['v1'] = '[@' + sgn + ']'
        cparams['v2'] = '[@ ' + sgn + ' ]'
        cparams['v3'] = '[@ ' + sgn + ']'
        cparams['v4'] = '[@' + sgn + ' ]'
        bugs[sgn] = []
        queries.append(
            Query(Bugzilla.API_URL, cparams, __bug_handler, bugs[sgn]))
    res_bugs = Bugzilla(queries=queries)

    # we have stats by signature in self.signatures
    # for each signature get the number of crashes on the last X days
    # so get the signature trend
    trends = {}
    default_trend = {}
    for i in range(duration):
        default_trend[_date - timedelta(i)] = 0

    base = {
        'product': product,
        'version': versions,
        'signature': None,
        'date': socorro.SuperSearch.get_search_date(start_date, end_date),
        'release_channel': channel,
        '_results_number': 0,
        '_histogram.date': ['signature'],
        '_histogram_interval': 1
    }

    queries = []
    for sgns in Connection.chunks(
            list(map(lambda sgn: '=' + sgn, signatures.keys())), 10):
        sgn_group = []
        for sgn in sgns:
            if sum(len(s) for s in sgn_group) >= 1000:
                cparams = base.copy()
                cparams['signature'] = sgn_group
                queries.append(
                    Query(socorro.SuperSearch.URL, cparams,
                          functools.partial(__trend_handler, default_trend),
                          trends))
                sgn_group = []

            sgn_group.append(sgn)

        if len(sgn_group) > 0:
            cparams = base.copy()
            cparams['signature'] = sgn_group
            queries.append(
                Query(socorro.SuperSearch.URL, cparams,
                      functools.partial(__trend_handler, default_trend),
                      trends))

    sys.stdout.write('Getting trends for top signatures from Socorro...')
    sys.stdout.flush()
    socorro.SuperSearch(queries=queries).wait()
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    for sgn, trend in trends.items():
        signatures[sgn] = (signatures[sgn], [
            trend[key] for key in sorted(trend.keys(), reverse=True)
        ])

    _signatures = {}
    # order self.signatures by crash count
    sorted_signatures = sorted(signatures.items(),
                               key=lambda x: x[1][0][0],
                               reverse=True)
    i = 1
    for s in sorted_signatures:
        _signatures[s[0]] = i  # top crash rank
        i += 1

    sys.stdout.write(
        'Getting bugs linked to the top signatures from Bugzilla...')
    sys.stdout.flush()
    while True:
        try:
            for r in res_bugs.results:
                r.result(timeout=2)
            break
        except TimeoutError:
            sys.stdout.write('.')
            sys.stdout.flush()
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    # TODO: In the first query to get the bugs, also get dupe_of and avoid the first query
    #       in follow_dup (so modify follow_dup to accept both a bug ID or a bug object).
    queries = []
    for sgn in signatures.keys():
        duplicate_ids = [
            bug['id'] for bug in bugs[sgn] if bug['resolution'] == 'DUPLICATE'
        ]

        # Remove bugs resolved as DUPLICATE from the list of bugs associated to the signature.
        bugs[sgn] = [
            bug for bug in bugs[sgn] if bug['id'] not in duplicate_ids
        ]

        # Find duplicates for bugs resolved as DUPLICATE.
        duplicates = {
            k: v
            for k, v in Bugzilla.follow_dup(duplicate_ids).items()
            if v is not None
        }
        duplicate_targets = [
            bug_id for bug_id in duplicates.values()
            if int(bug_id) not in [bug['id'] for bug in bugs[sgn]]
        ]
        if len(duplicate_targets) == 0:
            continue

        # Get info about bugs that the DUPLICATE bugs have been duped to.
        params = {
            'id': ','.join(duplicate_targets),
            'include_fields': bug_flags,
        }
        queries.append(
            Query(Bugzilla.API_URL, params, __bug_handler, bugs[sgn]))
    sys.stdout.write(
        'Resolving duplicate bugs to the bugs they\'ve been duplicated to...')
    sys.stdout.flush()
    Bugzilla(queries=queries).wait()
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    for sgn, stats in signatures.items():
        # stats is 2-uple: ([count, win_count, mac_count, linux_count, startup_count], trend)
        startup_percent = float(stats[0][4]) / float(stats[0][0])
        _signatures[sgn] = {
            'tc_rank': _signatures[sgn],
            'crash_count': stats[0][0],
            'estimated_user_count': stats[0][5],
            'startup_percent': startup_percent,
            'bugs': bugs[sgn]
        }

    return {
        'start_date': start_date,
        'end_date': end_date,
        'versions': list(versions),
        'signatures': _signatures,
    }