Example #1
0
def get_bugs(signature, wait=True):
    # return a dict: bugid => buginfo
    # if buginfo is None => security bug

    if not signature:
        return {}

    logger.info("Get bugs for signature {}: started.".format(signature))

    def bug_handler(bug, data):
        if "cf_crash_signature" in bug:
            if signature in utils.get_signatures([bug["cf_crash_signature"]]):
                data[bug["id"]] = bug
            del bug["cf_crash_signature"]

    start_date = pytz.utc.localize(datetime.utcnow())
    start_date -= relativedelta(hours=2)
    data = {}
    bz = Bugzilla(get_bz_search(signature, start_date),
                  bughandler=bug_handler,
                  bugdata=data).get_data()

    bugs = socorro.Bugs.get_bugs([signature])[signature]
    bz.wait()
    bz_bugs = set(data.keys())

    old_bugs = []
    for bug in bugs:
        if bug not in bz_bugs:
            old_bugs.append(bug)
            # the bug is in Socorro and not in search query
            data[bug] = None

    bz = Bugzilla(bugids=old_bugs,
                  include_fields=BZ_FIELDS,
                  bughandler=bug_handler,
                  bugdata=data)
    if wait:
        bz.wait()
        logger.info("Get bugs: finished.")
        return data

    logger.info("Get bugs: finished.")

    return bz, data
Example #2
0
def get(channel,
        date,
        product='Firefox',
        duration=11,
        tc_limit=50,
        crash_type='all',
        startup=False):
    """Get crashes info

    Args:
        channel (str): the channel
        date (str): the final date
        product (Optional[str]): the product
        duration (Optional[int]): the duration to retrieve the data
        tc_limit (Optional[int]): the number of topcrashes to load
        crash_type (Optional[str]): 'all' (default) or 'browser' or 'content' or 'plugin'

    Returns:
        dict: contains all the info relative to the crashes
    """
    channel = channel.lower()
    version = v[channel]
    sys.stdout.write('Getting version information from Socorro...')
    sys.stdout.flush()
    versions_info = socorro.ProductVersions.get_version_info(version,
                                                             channel=channel,
                                                             product=product)
    versions = versions_info.keys()
    platforms = socorro.Platforms.get_cached_all()
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    if crash_type and isinstance(crash_type, six.string_types):
        crash_type = [crash_type]

    throttle = set(map(lambda p: p[1], versions_info.values()))
    if len(throttle) == 1:
        throttle = throttle.pop()
    else:
        return

    _date = utils.get_date_ymd(date)
    start_date = utils.get_date_str(_date - timedelta(duration - 1))
    end_date = utils.get_date_str(_date)

    # First, we get the ADI
    sys.stdout.write('Getting ADI from Socorro...')
    sys.stdout.flush()
    adi = socorro.ADI.get(version=versions,
                          product=product,
                          end_date=end_date,
                          duration=duration,
                          platforms=platforms)
    adi = [adi[key] for key in sorted(adi.keys(), reverse=True)]
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    # get the khours
    sys.stdout.write('Getting khours from Re:dash...')
    sys.stdout.flush()
    khours = Redash.get_khours(utils.get_date_ymd(start_date),
                               utils.get_date_ymd(end_date), channel, versions,
                               product)
    khours = [khours[key] for key in sorted(khours.keys(), reverse=True)]
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    overall_crashes_by_day = []
    signatures = {}

    def signature_handler(json):
        for signature in json['facets']['signature']:
            signatures[signature['term']] = [signature['count'], 0, 0, 0, 0, 0]

            for platform in signature['facets']['platform']:
                if platform['term'] == 'Linux':
                    signatures[signature['term']][3] = platform['count']
                elif platform['term'] == 'Windows NT':
                    signatures[signature['term']][1] = platform['count']
                elif platform['term'] == 'Mac OS X':
                    signatures[signature['term']][2] = platform['count']

            # XXX: Remove this when all versions will have the StartupCrash annotation.
            if version >= 51:
                for startup_crash in signature['facets']['startup_crash']:
                    if startup_crash['term'] in ['1', 'T']:
                        signatures[
                            signature['term']][4] += startup_crash['count']
            else:
                for uptime in signature['facets']['histogram_uptime']:
                    if uptime['term'] == 0:
                        signatures[signature['term']][4] = uptime['count']
                        break

            signatures[signature['term']][5] = signature['facets'][
                'cardinality_install_time']['value']

        for facets in json['facets']['histogram_date']:
            overall_crashes_by_day.insert(0, facets['count'])

    params = {
        'product':
        product,
        'version':
        versions,
        'date':
        socorro.SuperSearch.get_search_date(start_date, end_date),
        'release_channel':
        channel,
        '_aggs.signature': [
            'platform', '_histogram.uptime', '_cardinality.install_time',
            'startup_crash'
        ],
        '_results_number':
        0,
        '_facets_size':
        tc_limit,
        '_histogram.date': ['product'],
        '_histogram_interval':
        1,
        '_histogram_interval.uptime':
        60,
    }

    if startup:
        # XXX: Remove this when all versions will have the StartupCrash annotation.
        if version >= 51:
            params['startup_crash'] = True
        else:
            params['uptime'] = '<=60'

    sys.stdout.write('Getting top signatures from Socorro...')
    sys.stdout.flush()
    socorro.SuperSearch(params=params, handler=signature_handler).wait()
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    bug_flags = [
        'resolution', 'id', 'last_change_time',
        'cf_tracking_firefox' + str(version)
    ]
    for i in range(int(version), int(v['nightly']) + 1):
        bug_flags.append('cf_status_firefox' + str(i))

    # TODO: too many requests... should be improved with chunks
    bugs = {}
    # TODO: Use regexp, when the Bugzilla bug that prevents them from working will be fixed.
    base = {
        'j_top': 'OR',
        'o1': 'substring',
        'f1': 'cf_crash_signature',
        'v1': None,
        'o2': 'substring',
        'f2': 'cf_crash_signature',
        'v2': None,
        'o3': 'substring',
        'f3': 'cf_crash_signature',
        'v3': None,
        'o4': 'substring',
        'f4': 'cf_crash_signature',
        'v4': None,
        'include_fields': bug_flags
    }

    queries = []
    for sgn in signatures.keys():
        cparams = base.copy()
        cparams['v1'] = '[@' + sgn + ']'
        cparams['v2'] = '[@ ' + sgn + ' ]'
        cparams['v3'] = '[@ ' + sgn + ']'
        cparams['v4'] = '[@' + sgn + ' ]'
        bugs[sgn] = []
        queries.append(
            Query(Bugzilla.API_URL, cparams, __bug_handler, bugs[sgn]))
    res_bugs = Bugzilla(queries=queries)

    # we have stats by signature in self.signatures
    # for each signature get the number of crashes on the last X days
    # so get the signature trend
    trends = {}
    default_trend = {}
    for i in range(duration):
        default_trend[_date - timedelta(i)] = 0

    base = {
        'product': product,
        'version': versions,
        'signature': None,
        'date': socorro.SuperSearch.get_search_date(start_date, end_date),
        'release_channel': channel,
        '_results_number': 0,
        '_histogram.date': ['signature'],
        '_histogram_interval': 1
    }

    queries = []
    for sgns in Connection.chunks(
            list(map(lambda sgn: '=' + sgn, signatures.keys())), 10):
        sgn_group = []
        for sgn in sgns:
            if sum(len(s) for s in sgn_group) >= 1000:
                cparams = base.copy()
                cparams['signature'] = sgn_group
                queries.append(
                    Query(socorro.SuperSearch.URL, cparams,
                          functools.partial(__trend_handler, default_trend),
                          trends))
                sgn_group = []

            sgn_group.append(sgn)

        if len(sgn_group) > 0:
            cparams = base.copy()
            cparams['signature'] = sgn_group
            queries.append(
                Query(socorro.SuperSearch.URL, cparams,
                      functools.partial(__trend_handler, default_trend),
                      trends))

    sys.stdout.write('Getting trends for top signatures from Socorro...')
    sys.stdout.flush()
    socorro.SuperSearch(queries=queries).wait()
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    for sgn, trend in trends.items():
        signatures[sgn] = (signatures[sgn], [
            trend[key] for key in sorted(trend.keys(), reverse=True)
        ])

    _signatures = {}
    # order self.signatures by crash count
    l = sorted(signatures.items(), key=lambda x: x[1][0][0], reverse=True)
    i = 1
    for s in l:
        _signatures[s[0]] = i  # top crash rank
        i += 1

    sys.stdout.write(
        'Getting bugs linked to the top signatures from Bugzilla...')
    sys.stdout.flush()
    res_bugs.wait()
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    # TODO: In the first query to get the bugs, also get dupe_of and avoid the first query
    #       in follow_dup (so modify follow_dup to accept both a bug ID or a bug object).
    queries = []
    for sgn in signatures.keys():
        duplicate_ids = [
            bug['id'] for bug in bugs[sgn] if bug['resolution'] == 'DUPLICATE'
        ]

        # Remove bugs resolved as DUPLICATE from the list of bugs associated to the signature.
        bugs[sgn] = [
            bug for bug in bugs[sgn] if bug['id'] not in duplicate_ids
        ]

        # Find duplicates for bugs resolved as DUPLICATE.
        duplicates = {
            k: v
            for k, v in Bugzilla.follow_dup(duplicate_ids).items()
            if v is not None
        }
        duplicate_targets = [
            bug_id for bug_id in duplicates.values()
            if int(bug_id) not in [bug['id'] for bug in bugs[sgn]]
        ]
        if len(duplicate_targets) == 0:
            continue

        # Get info about bugs that the DUPLICATE bugs have been duped to.
        params = {
            'id': ','.join(duplicate_targets),
            'include_fields': bug_flags,
        }
        queries.append(
            Query(Bugzilla.API_URL, params, __bug_handler, bugs[sgn]))
    sys.stdout.write(
        'Resolving duplicate bugs to the bugs they\'ve been duplicated to...')
    sys.stdout.flush()
    Bugzilla(queries=queries).wait()
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    for sgn, stats in signatures.items():
        # stats is 2-uple: ([count, win_count, mac_count, linux_count, startup_count], trend)
        startup_percent = float(stats[0][4]) / float(stats[0][0])
        _signatures[sgn] = {
            'tc_rank': _signatures[sgn],
            'crash_count': stats[0][0],
            'estimated_user_count': stats[0][5],
            'startup_percent': startup_percent,
            'crash_by_day': stats[1],
            'bugs': bugs[sgn]
        }

    return {
        'start_date': start_date,
        'end_date': end_date,
        'versions': list(versions),
        'adi': adi,
        'khours': khours,
        'crash_by_day': overall_crashes_by_day,
        'signatures': _signatures,
        'throttle': float(throttle)
    }