Example #1
0
def get_buildids_from_socorro(search_buildid, search_date, product):
    """Get the builds from socorro for nightly channel.
    For other channels we use the database (fed with buildhub data)"""
    def handler(json, data):
        if json["errors"] or not json["facets"]["build_id"]:
            return
        for facets in json["facets"]["build_id"]:
            bid = facets["term"]
            data.append(bid)

    params = {
        "product": product,
        "release_channel": "nightly",
        "date": search_date,
        "build_id": search_buildid,
        "_facets": "build_id",
        "_results_number": 0,
        "_facets_size": 100,
    }

    data = []
    socorro.SuperSearch(params=params, handler=handler,
                        handlerdata=data).wait()

    data = sorted(data)

    return data
def get_buildids_from_socorro(search_buildid, search_date, product):
    """Get the builds from socorro for nightly channel.
       For other channels we use the database (fed with buildhub data)"""
    def handler(json, data):
        if json['errors'] or not json['facets']['build_id']:
            return
        for facets in json['facets']['build_id']:
            bid = facets['term']
            data.append(bid)

    params = {
        'product': product,
        'release_channel': 'nightly',
        'date': search_date,
        'build_id': search_buildid,
        '_facets': 'build_id',
        '_results_number': 0,
        '_facets_size': 100
    }

    data = []
    socorro.SuperSearch(params=params, handler=handler,
                        handlerdata=data).wait()

    data = sorted(data)

    return data
Example #3
0
def get(channel, versions=None, product='Firefox', start_date=None, end_date='today', duration=30, platforms=None):
    if not isinstance(versions, list):
        if isinstance(versions, numbers.Number):
            versions = socorro.ProductVersions.get_active(vnumber=versions, product=product)
        else:
            versions = socorro.ProductVersions.get_active(product=product)
        versions = versions[channel.lower()]

    if start_date:
        _sdate = utils.get_date_ymd(start_date)
        _edate = utils.get_date_ymd(end_date)
        duration = (_edate - _sdate).days

    adi = socorro.ADI.get(version=versions, product=product, end_date=end_date, duration=duration, platforms=platforms)

    data = {}
    for d, n in adi.items():
        data[d] = {'adi': n, 'browser': 0, 'content': 0, 'plugin': 0, 'browser_rate': 0, 'content_rate': 0, 'b+c_rate': 0, 'plugin_rate': 0}

    start_date = utils.get_date(end_date, duration)
    search_date = socorro.SuperSearch.get_search_date(start_date, end_date)

    socorro.SuperSearch(params={'product': product,
                                'version': versions,
                                'release_channel': channel,
                                'date': search_date,
                                '_results_number': 0,
                                '_facets_size': 2,  # 2 is for a facet on plugin and on content
                                '_histogram.date': ['process_type']},
                        handler=__super_search_handler,
                        handlerdata=data).wait()

    return data
Example #4
0
def get(channels, product='Firefox', date='today', query={}):
    today = utils.get_date_ymd(date)
    tomorrow = today + relativedelta(days=1)
    six_months_ago = today - relativedelta(weeks=25)
    search_date = socorro.SuperSearch.get_search_date(six_months_ago, tomorrow)
    data = {chan: {} for chan in channels}

    def handler(json, data):
        if not json['facets']['histogram_date']:
            return

        for facets in json['facets']['histogram_date']:
            date = utils.get_date_ymd(facets['term'])
            channels = facets['facets']['release_channel']
            for chan in channels:
                total = chan['count']
                channel = chan['term']
                data[channel][date] = total

    params = {
        'product': product,
        'date': search_date,
        'release_channel': channels,
        '_histogram.date': 'release_channel',
        '_results_number': 0
    }
    params.update(query)

    socorro.SuperSearch(params=params, handler=handler,
                        handlerdata=data).wait()

    return data
Example #5
0
def get_total(channels, product='Firefox', date='today'):
    today = utils.get_date_ymd(date)
    tomorrow = today + relativedelta(days=1)
    search_date = socorro.SuperSearch.get_search_date(today, tomorrow)
    data = {chan: 0 for chan in channels}

    def handler(json, data):
        if not json['facets']['histogram_date']:
            return

        for facets in json['facets']['histogram_date']:
            channels = facets['facets']['release_channel']
            for chan in channels:
                total = chan['count']
                channel = chan['term']
                data[channel] = total

    socorro.SuperSearch(params={
        'product': product,
        'date': search_date,
        'release_channel': channels,
        '_histogram.date': 'release_channel',
        '_results_number': 0
    },
                        handler=handler,
                        handlerdata=data).wait()

    return data
def analyze_gfx_critical_errors(signature='',
                                product='Firefox',
                                channel=['all'],
                                versions=[],
                                start_date=''):
    if product.lower() == 'firefox':
        product = 'Firefox'

    if channel == [] or channel[0].lower() == 'all':
        channel = ['release', 'beta', 'nightly']
        if product == 'Firefox':
            channel.append('esr')
    else:
        channel = [c.lower() for c in channel]

    if not versions:
        base_versions = libmozdata.versions.get(base=True)
        versions_by_channel = socorro.ProductVersions.get_info_from_major(
            base_versions, product=product)
        versions = []
        for v1 in versions_by_channel.values():
            for v2 in v1:
                versions.append(v2['version'])

    if not start_date:
        start_date = utils.get_date('today', 7)

    gfx_critical_errors = get_critical_errors()

    count = {}

    def handler(json, gfx_critical_error):
        count[gfx_critical_error] = json['total']

    base_params = {
        'product': product,
        'release_channel': channel,
        'version': versions,
        'date': '>=' + start_date,
        '_results_number': 0,
        '_facets_size': 0,
    }

    if signature:
        base_params['signature'] = signature

    queries = []
    for gfx_critical_error in gfx_critical_errors:
        params = base_params.copy()
        params['graphics_critical_error'] = '~' + gfx_critical_error
        queries.append(
            Query(socorro.SuperSearch.URL,
                  params=params,
                  handler=handler,
                  handlerdata=gfx_critical_error))

    socorro.SuperSearch(queries=queries).wait()

    return count
def get_proto_small(product, signatures, search_date, channel):
    """Get the proto-signatures for signature with a small number of crashes.
       Since we 'must' aggregate uuid on proto-signatures, to be faster we query
       several signatures: it's possible because we know that card(proto) <= card(crashes)
       for a given signature."""
    logger.info('Get proto-signatures (small) for {}-{}: started.'.format(
        product, channel))

    def handler(bid, threshold, json, data):
        if not json['facets']['proto_signature']:
            return
        for facets in json['facets']['proto_signature']:
            _facets = facets['facets']
            sgn = _facets['signature'][0]['term']
            protos = data[sgn]['protos'][bid]
            if len(protos) < threshold:
                proto = facets['term']
                count = facets['count']
                uuid = _facets['uuid'][0]['term']
                protos.append({'proto': proto, 'count': count, 'uuid': uuid})
        for facets in json['facets']['signature']:
            sgn = facets['term']
            count = facets['facets']['cardinality_install_time']['value']
            data[sgn]['installs'][bid] = 1 if count == 0 else count

    limit = config.get_limit_facets()
    threshold = config.get_threshold('protos', product, channel)
    base_params = {
        'product': product,
        'release_channel': utils.get_search_channel(channel),
        'date': search_date,
        'build_id': '',
        'signature': '',
        '_aggs.proto_signature': ['uuid', 'signature'],
        '_aggs.signature': '_cardinality.install_time',
        '_results_number': 0,
        '_facets': 'release_channel',
        '_facets_size': limit
    }

    sgns_by_bids = utils.get_sgns_by_bids(signatures)
    for bid, all_signatures in sgns_by_bids.items():
        params = copy.deepcopy(base_params)
        params['build_id'] = utils.get_buildid(bid)
        queries = []
        hdler = functools.partial(handler, bid, threshold)
        for sgns in Connection.chunks(all_signatures, 10):
            params = copy.deepcopy(params)
            params['signature'] = ['=' + s for s in sgns]
            queries.append(
                Query(socorro.SuperSearch.URL,
                      params=params,
                      handler=hdler,
                      handlerdata=signatures))

        socorro.SuperSearch(queries=queries).wait()

    logger.info('Get proto-signatures (small) for {}-{}: finished.'.format(
        product, channel))
Example #8
0
def filter_nightly_buildids(buildids):
    def handler(threshold, json, data):
        if not json['facets']['build_id']:
            return
        for facets in json['facets']['build_id']:
            count = facets['count']
            if count >= threshold:
                bid = utils.get_build_date(facets['term'])
                data[bid] = True

    params = {
        'product': '',
        'build_id': '',
        'date': '',
        'release_channel': 'nightly',
        '_aggs.build_id': 'release_channel',
        '_results_number': 0,
        '_facets': 'release_channel',
        '_facets_size': 1000
    }

    data = {'Firefox': None, 'FennecAndroid': None}
    queries = []
    for prod in data.keys():
        pparams = copy.deepcopy(params)
        pparams['product'] = prod
        threshold = config.get_min_total(prod, 'nightly')
        data[prod] = D = {}
        for bids in Connection.chunks(buildids[prod]['nightly'],
                                      chunk_size=128):
            pparams = copy.deepcopy(pparams)
            pparams['date'] = '>=' + bids[0][0].strftime('%Y-%m-%d')
            pparams['build_id'] = L = []
            for b in bids:
                L.append(utils.get_buildid(b[0]))
                D[b[0]] = False

            hdler = functools.partial(handler, threshold)
            queries.append(
                Query(socorro.SuperSearch.URL,
                      params=pparams,
                      handler=hdler,
                      handlerdata=D))

    socorro.SuperSearch(queries=queries).wait()

    for prod, info in data.items():
        bids = buildids[prod]['nightly']
        L = [(bid, info[bid[0]]) for bid in bids]
        for i in range(len(L) - 1, -1, -1):
            if not L[i][1]:
                L[i] = (L[i][0], True)
            else:
                break
        buildids[prod]['nightly'] = [x[0] for x in L if x[1]]
Example #9
0
def get_sgns_for_doubloons(doubloons, signatures, search_date, base_data):
    if not doubloons:
        return None

    limit = 50
    nbase = [0, 0]

    def handler(bid, json, data):
        if not json['facets']['signature']:
            return
        for facets in json['facets']['signature']:
            sgn = facets['term']
            _facets = facets['facets']
            nums = data[sgn]
            if isinstance(nums, list):
                data[sgn] = nums = {b: copy.copy(nbase) for b in data[sgn]}
            if bid in nums:
                n = nums[bid]
                n[RAW] = facets['count']
                N = len(_facets['install_time'])
                if N == limit:
                    N = _facets['cardinality_install_time']['value']
                n[INSTALLS] = N

    base_params = {
        'build_id': '',
        'product': '',
        'release_channel': '',
        'signature': ['=' + s for s in signatures],
        'date': search_date,
        '_aggs.signature': ['install_time', '_cardinality.install_time'],
        '_results_number': 0,
        '_facets': 'product',
        '_facets_size': limit
    }

    queries = []
    for bid, pcvs in doubloons.items():
        bparams = copy.deepcopy(base_params)
        bparams['build_id'] = bid
        bid = utils.get_build_date(bid)
        for pcv in pcvs:
            params = copy.deepcopy(bparams)
            prod, chan, ver = pcv
            params['product'] = prod
            params['release_channel'] = chan
            params['version'] = ver
            hdler = functools.partial(handler, bid)
            queries.append(
                Query(socorro.SuperSearch.URL,
                      params=params,
                      handler=hdler,
                      handlerdata=base_data[prod][chan]))

    socorro.SuperSearch(queries=queries).wait()
Example #10
0
def get_signatures(channels,
                   product='Firefox',
                   date='today',
                   query={},
                   ndays=7,
                   N=50):
    today = utils.get_date_ymd(date)
    tomorrow = today + relativedelta(days=1)
    few_days_ago = today - relativedelta(days=ndays)
    search_date = socorro.SuperSearch.get_search_date(few_days_ago, tomorrow)
    base = {few_days_ago + relativedelta(days=i): 0 for i in range(ndays + 1)}
    data = {chan: defaultdict(lambda: copy.copy(base)) for chan in channels}

    def handler(skip_pats, json, data):
        if json['errors'] or not json['facets']['histogram_date']:
            return

        for facets in json['facets']['histogram_date']:
            date = utils.get_date_ymd(facets['term'])
            signatures = facets['facets']['signature']
            for signature in signatures:
                if any(p.match(signature) for p in skip_pats):
                    continue
                total = signature['count']
                sgn = signature['term']
                data[sgn][date] += total

    params = {
        'product': product,
        'date': search_date,
        'release_channel': '',
        '_histogram.date': 'signature',
        '_results_number': 0,
        '_facets_size': 10000
    }
    params.update(query)

    searches = []
    for chan in channels:
        skip_pats = config.get_skiplist_channel(chan)
        params = copy.deepcopy(params)
        params['release_channel'] = chan
        hdler = functools.partial(handler, skip_pats)
        searches.append(
            socorro.SuperSearch(params=params,
                                handler=hdler,
                                handlerdata=data[chan]))

    for s in searches:
        s.wait()

    for chan in channels:
        gather(data[chan])

    return get_top_signatures(data, product, N=N)
Example #11
0
    def test_search(self):
        data = {}
        socorro.SuperSearch(params={'product': 'Firefox',
                                    'signature': '~OOM',
                                    '_columns': ['uuid', 'build_id'],
                                    '_results_number': 0,
                                    '_facets': ['product']},
                            handler=lambda j, d: d.update(j),
                            handlerdata=data).wait()

        self.assertIsNotNone(data)
Example #12
0
def get_fenix_buildids(channels):
    # We don't have build info for Fenix on buildhub
    # so we get them from crash-stats
    date = lmdutils.get_date_ymd('today') - relativedelta(months=3)
    min_bid = date.strftime('%Y%m%d000000')
    date = date.strftime('%Y-%m-%d')

    params = {
        'product': ['Fenix'],
        'date': '>=' + date,
        'build_id': '>=' + min_bid,
        '_aggs.build_id': 'version',
        '_results_number': 0,
        '_facets': 'release_channel',
        '_facets_size': 1000,
    }

    data = {}

    def handler(chan, json, data):
        if not json['facets']['build_id']:
            return

        info = {}
        for facets in json['facets']['build_id']:
            bid = facets['term']
            versions = facets['facets']['version']
            relevant = max(versions, key=lambda x: x['count'])
            # Workaround to remove "noise" in release channel.
            # Anyway all this stuff will be removed once we have
            # build data on buildhub.
            if chan != 'release' or relevant['count'] >= 200:
                version = relevant['term']
                info[str(bid)] = version

        info = sorted(info.items())
        data[chan] = [list(x) for x in info]

    queries = []
    for chan in channels:
        params = params.copy()
        params['release_channel'] = chan
        hdler = functools.partial(handler, chan)
        queries.append(
            Query(socorro.SuperSearch.URL,
                  params=params,
                  handler=hdler,
                  handlerdata=data))

    ss = socorro.SuperSearch(queries=queries)
    ss.wait()
    ss.session.close()

    return data
Example #13
0
    def test_bugs(self):
        signature = []
        socorro.SuperSearch(params={'product': 'Firefox',
                                    'signature': '~OOM',
                                    '_results_number': 0,
                                    '_facets': ['signature']},
                            handler=lambda j, d: d.extend([j['facets']['signature'][0]['term'], j['facets']['signature'][1]['term']]),
                            handlerdata=signature).wait()

        self.assertEqual(len(signature), 2)
        bugs = socorro.Bugs.get_bugs(signature)
        self.assertIsNotNone(bugs)
Example #14
0
def get_proto_big(product, signatures, search_date, channel):
    """Get proto-signatures for signatures which have a high # of crashes (>=500)"""
    logger.info("Get proto-signatures (big) for {}-{}: started.".format(
        product, channel))

    def handler(bid, threshold, json, data):
        if not json["facets"]["proto_signature"]:
            return
        installs = json["facets"]["cardinality_install_time"]["value"]
        data["installs"][bid] = 1 if installs == 0 else installs
        for facets in json["facets"]["proto_signature"]:
            protos = data["protos"][bid]
            if len(protos) < threshold:
                proto = facets["term"]
                count = facets["count"]
                uuid = facets["facets"]["uuid"][0]["term"]
                protos.append({"proto": proto, "count": count, "uuid": uuid})

    threshold = config.get_threshold("protos", product, channel)
    base_params = {
        "product": product,
        "release_channel": utils.get_search_channel(channel),
        "date": search_date,
        "build_id": "",
        "signature": "",
        "_aggs.proto_signature": "uuid",
        "_results_number": 0,
        "_facets": "_cardinality.install_time",
        "_facets_size": threshold,
    }

    sgns_by_bids = utils.get_sgns_by_bids(signatures)
    for bid, all_signatures in sgns_by_bids.items():
        params = copy.deepcopy(base_params)
        params["build_id"] = utils.get_buildid(bid)
        queries = []
        hdler = functools.partial(handler, bid, threshold)
        for sgn in all_signatures:
            params = copy.deepcopy(params)
            params["signature"] = "=" + sgn
            queries.append(
                Query(
                    socorro.SuperSearch.URL,
                    params=params,
                    handler=hdler,
                    handlerdata=signatures[sgn],
                ))

        socorro.SuperSearch(queries=queries).wait()

    logger.info("Get proto-signatures (big) for {}-{}: finished.".format(
        product, channel))
Example #15
0
    def test_processed(self):
        uuid = []
        socorro.SuperSearch(params={'product': 'Firefox',
                                    'signature': '~OOM',
                                    '_columns': ['uuid'],
                                    '_results_number': 2,
                                    '_facets': ['product']},
                            handler=lambda j, d: d.extend([j['hits'][0]['uuid'], j['hits'][1]['uuid']]),
                            handlerdata=uuid).wait()

        self.assertEqual(len(uuid), 2)
        processed = socorro.ProcessedCrash.get_processed(uuid)
        self.assertIsNotNone(processed)
def filter_nightly_buildids_helper(fa_bids, fx_bids):
    """Filter the nightly builds to keep only the relevant ones.
    """
    data = {'Firefox': set(), 'FennecAndroid': set()}
    params = get_filter_query(fa_bids, fx_bids)
    socorro.SuperSearch(
        params=params, handler=filter_by_crashes_num, handlerdata=data
    ).wait()

    fa_bids = get_useful_bids(fa_bids, data['FennecAndroid'])
    fx_bids = get_useful_bids(fx_bids, data['Firefox'])

    return fa_bids, fx_bids
Example #17
0
def get(product, channel, date='yesterday'):
    yesterday = utils.get_date_ymd(date)
    versions, throttle = magutils.get_versions(yesterday, product, channel)

    def handler(rep, catname, json, data):
        if json['errors'] or not json['facets']['histogram_date']:
            return {}
        else:
            for facets in json['facets']['histogram_date']:
                total = facets['count']
                dt = facets['term']
                if rep['process_split']:
                    nonbrowser = 0
                    pt = facets['facets']['process_type']
                    d = defaultdict(lambda: 0)
                    for pt in facets['facets']['process_type']:
                        ty = pt['term']
                        N = pt['count']
                        d[ty] += int(N * throttle)
                        nonbrowser += N
                    d['browser'] += int((total - nonbrowser) * throttle)
                    data[dt][catname] = dict(d)
                else:
                    data[dt][catname] = int(total * throttle)

    queries = []
    data = defaultdict(lambda: dict())
    today = yesterday + timedelta(days=1)
    search_date = socorro.SuperSearch.get_search_date(yesterday, today)
    for catname, rep in reports.items():
        if rep['desktoponly'] and product != 'Firefox':
            continue
        params = {
            'product': product,
            'version': versions,
            'date': search_date,
            'release_channel': channel,
            '_histogram.date': 'process_type',
            '_facets_size': 5,
            '_results_number': 0
        }
        params.update(rep['params'])
        queries.append(
            Query(socorro.SuperSearch.URL,
                  params,
                  handler=functools.partial(handler, rep, catname),
                  handlerdata=data))

    socorro.SuperSearch(queries=queries).wait()

    return dict(data)
Example #18
0
def get_proto_big(product, signatures, search_date, channel):
    """Get proto-signatures for signatures which have a high # of crashes (>=500)"""
    logger.info('Get proto-signatures (big) for {}-{}: started.'.format(
        product, channel))

    def handler(bid, threshold, json, data):
        if not json['facets']['proto_signature']:
            return
        installs = json['facets']['cardinality_install_time']['value']
        data['installs'][bid] = 1 if installs == 0 else installs
        for facets in json['facets']['proto_signature']:
            protos = data['protos'][bid]
            if len(protos) < threshold:
                proto = facets['term']
                count = facets['count']
                uuid = facets['facets']['uuid'][0]['term']
                protos.append({'proto': proto, 'count': count, 'uuid': uuid})

    threshold = config.get_threshold('protos', product, channel)
    base_params = {
        'product': product,
        'release_channel': utils.get_search_channel(channel),
        'date': search_date,
        'build_id': '',
        'signature': '',
        '_aggs.proto_signature': 'uuid',
        '_results_number': 0,
        '_facets': '_cardinality.install_time',
        '_facets_size': threshold
    }

    sgns_by_bids = utils.get_sgns_by_bids(signatures)
    for bid, all_signatures in sgns_by_bids.items():
        params = copy.deepcopy(base_params)
        params['build_id'] = utils.get_buildid(bid)
        queries = []
        hdler = functools.partial(handler, bid, threshold)
        for sgn in all_signatures:
            params = copy.deepcopy(params)
            params['signature'] = '=' + sgn
            queries.append(
                Query(socorro.SuperSearch.URL,
                      params=params,
                      handler=hdler,
                      handlerdata=signatures[sgn]))

        socorro.SuperSearch(queries=queries).wait()

    logger.info('Get proto-signatures (big) for {}-{}: finished.'.format(
        product, channel))
Example #19
0
def get_sgns_data_helper(data,
                         platforms,
                         signatures,
                         bids_chan,
                         extra,
                         search_date,
                         product,
                         channel=None):
    """Get the data from Socorro and collect the stats.
    """
    limit = 80
    base_params = {
        'build_id': [utils.get_buildid(bid) for bid in bids_chan.keys()],
        'date':
        search_date,
        'product':
        product,
        '_aggs.build_id': [
            'install_time',
            '_cardinality.install_time',
            'startup_crash',
            'platform_pretty_version',
        ],
        '_results_number':
        0,
        '_facets':
        'signature',
        '_facets_size':
        limit,
    }

    if channel:
        base_params['release_channel'] = (['beta', 'aurora']
                                          if channel == 'beta' else channel)

    utils.update_params(base_params, extra)
    queries = []

    for signature in signatures:
        params = base_params.copy()
        params['signature'] = '=' + signature
        hdler = functools.partial(filter_signatures_data, limit, product,
                                  platforms, signature, bids_chan)
        queries.append(
            Query(socorro.SuperSearch.URL,
                  params=params,
                  handler=hdler,
                  handlerdata=data))

    return socorro.SuperSearch(queries=queries)
Example #20
0
def get_uuids_fennec(signatures, search_date, channel):
    """Get the uuids for Fennec java crashes"""
    logger.info("Get uuids for Fennec-{}: started.".format(channel))

    def handler(json, data):
        if json["errors"] or not json["facets"]["signature"]:
            return
        bid = json["facets"]["build_id"][0]["term"]
        bid = utils.get_build_date(bid)
        for facets in json["facets"]["signature"]:
            sgn = facets["term"]
            count = facets["count"]
            facets = facets["facets"]
            uuid = facets["uuid"][0]["term"]
            protos = data[sgn]["protos"][bid]
            if not protos:
                protos.append({"proto": "", "count": count, "uuid": uuid})

    base_params = {
        "product": "Fennec",
        "release_channel": utils.get_search_channel(channel),
        "date": search_date,
        "build_id": "",
        "signature": "",
        "_aggs.signature": "uuid",
        "_results_number": 0,
        "_facets": "build_id",
        "_facets_size": 100,
    }

    queries = []
    sgns_by_bids = utils.get_sgns_by_bids(signatures)

    for bid, all_signatures in sgns_by_bids.items():
        params = copy.deepcopy(base_params)
        params["build_id"] = utils.get_buildid(bid)

        for sgns in Connection.chunks(all_signatures, 10):
            params = copy.deepcopy(params)
            params["signature"] = ["=" + s for s in sgns]
            queries.append(
                Query(
                    socorro.SuperSearch.URL,
                    params=params,
                    handler=handler,
                    handlerdata=signatures,
                ))
    socorro.SuperSearch(queries=queries).wait()

    logger.info("Get uuids for Fennec-{}: finished.".format(channel))
Example #21
0
def get_uuids_fennec(signatures, search_date, channel):
    """Get the uuids for FennecAndroid java crashes"""
    logger.info('Get uuids for FennecAndroid-{}: started.'.format(channel))

    def handler(json, data):
        if json['errors'] or not json['facets']['signature']:
            return
        bid = json['facets']['build_id'][0]['term']
        bid = utils.get_build_date(bid)
        for facets in json['facets']['signature']:
            sgn = facets['term']
            count = facets['count']
            facets = facets['facets']
            uuid = facets['uuid'][0]['term']
            protos = data[sgn]['protos'][bid]
            if not protos:
                protos.append({'proto': '', 'count': count, 'uuid': uuid})

    base_params = {
        'product': 'FennecAndroid',
        'release_channel': utils.get_search_channel(channel),
        'date': search_date,
        'build_id': '',
        'signature': '',
        '_aggs.signature': 'uuid',
        '_results_number': 0,
        '_facets': 'build_id',
        '_facets_size': 100
    }

    queries = []
    sgns_by_bids = utils.get_sgns_by_bids(signatures)

    for bid, all_signatures in sgns_by_bids.items():
        params = copy.deepcopy(base_params)
        params['build_id'] = utils.get_buildid(bid)

        for sgns in Connection.chunks(all_signatures, 10):
            params = copy.deepcopy(params)
            params['signature'] = ['=' + s for s in sgns]
            queries.append(
                Query(socorro.SuperSearch.URL,
                      params=params,
                      handler=handler,
                      handlerdata=signatures))
    socorro.SuperSearch(queries=queries).wait()

    logger.info('Get uuids for FennecAndroid-{}: finished.'.format(channel))
Example #22
0
    def test_signature(self):
        signature = []
        socorro.SuperSearch(params={
            'product': 'Firefox',
            'signature': '~OOM',
            '_results_number': 0,
            '_facets': ['signature']
        },
                            handler=lambda j, d: d.extend([
                                j['facets']['signature'][0]['term'], j[
                                    'facets']['signature'][1]['term']
                            ]),
                            handlerdata=signature).wait()

        self.assertEqual(len(signature), 2)
        trend = socorro.SignatureTrend.get_trend(signature, channel='nightly')
        self.assertIsNotNone(trend)
Example #23
0
def filter_buildids_helper(fa_bids, fx_bids, channel):
    """Filter the builds to keep only the relevant ones.
    """
    data = {'Firefox': set(), 'FennecAndroid': set()}
    params = get_filter_query(fa_bids, fx_bids, channel)
    ss = socorro.SuperSearch(
        params=params,
        handler=functools.partial(filter_by_crashes_num, channel),
        handlerdata=data,
    )
    ss.wait()
    ss.session.close()

    fa_bids = get_useful_bids(fa_bids, data['FennecAndroid'])
    fx_bids = get_useful_bids(fx_bids, data['Firefox'])

    return fa_bids, fx_bids
Example #24
0
def get_changeset(buildid, channel, product):
    """Trick to get changeset for a particular buildid/channel/product"""
    search_date = '>=' + lmdutils.get_date_str(buildid)
    buildid = utils.get_buildid(buildid)
    logger.info('Get changeset for {}-{}-{}.'.format(buildid, product,
                                                     channel))

    def handler(json, data):
        pat = re.compile(r'^.*:([0-9a-f]+)$')
        if not json['facets']['build_id']:
            return
        for facets in json['facets']['build_id']:
            for tf in facets['facets']['topmost_filenames']:
                m = pat.match(tf['term'])
                if m:
                    chgset = m.group(1)
                    count = tf['count']
                    data[chgset] += count

    params = {
        'product': product,
        'release_channel': channel,
        'build_id': buildid,
        'date': search_date,
        'topmost_filenames': '@\"hg:hg.mozilla.org/\".*:[0-9a-f]+',
        '_aggs.build_id': 'topmost_filenames',
        '_results_number': 0,
        '_facets': 'product',
        '_facets_size': 100
    }

    data = defaultdict(lambda: 0)
    socorro.SuperSearch(params=params, handler=handler,
                        handlerdata=data).wait()
    chgset = None
    if data:
        chgset, _ = max(data.items(), key=lambda p: p[1])
        chgset = utils.short_rev(chgset)

    logger.info('Get changeset: finished.')

    return chgset
Example #25
0
def get_changeset(buildid, channel, product):
    """Trick to get changeset for a particular buildid/channel/product"""
    search_date = ">=" + lmdutils.get_date_str(buildid)
    buildid = utils.get_buildid(buildid)
    logger.info("Get changeset for {}-{}-{}.".format(buildid, product,
                                                     channel))

    def handler(json, data):
        pat = re.compile(r"^.*:([0-9a-f]+)$")
        if not json["facets"]["build_id"]:
            return
        for facets in json["facets"]["build_id"]:
            for tf in facets["facets"]["topmost_filenames"]:
                m = pat.match(tf["term"])
                if m:
                    chgset = m.group(1)
                    count = tf["count"]
                    data[chgset] += count

    params = {
        "product": product,
        "release_channel": channel,
        "build_id": buildid,
        "date": search_date,
        "topmost_filenames": '@"hg:hg.mozilla.org/".*:[0-9a-f]+',
        "_aggs.build_id": "topmost_filenames",
        "_results_number": 0,
        "_facets": "product",
        "_facets_size": 100,
    }

    data = defaultdict(lambda: 0)
    socorro.SuperSearch(params=params, handler=handler,
                        handlerdata=data).wait()
    chgset = None
    if data:
        chgset, _ = max(data.items(), key=lambda p: p[1])
        chgset = utils.short_rev(chgset)

    logger.info("Get changeset: finished.")

    return chgset
Example #26
0
def get_by_install_time(channels, product='Firefox', date='today', query={}):
    today = utils.get_date_ymd(date)
    tomorrow = today + relativedelta(days=1)
    six_months_ago = today - relativedelta(weeks=25)
    search_date = socorro.SuperSearch.get_search_date(six_months_ago, tomorrow)
    data = {chan: {} for chan in channels}

    def handler(json, data):
        if not json['facets']['histogram_date']:
            return

        for facets in json['facets']['histogram_date']:
            date = utils.get_date_ymd(facets['term'])
            ninstalls = facets['facets']['cardinality_install_time']['value']
            data[date] = ninstalls

    params = {
        'product': product,
        'date': search_date,
        'release_channel': '',
        '_histogram.date': '_cardinality.install_time',
        '_results_number': 10
    }
    params.update(query)

    searches = []
    for chan in channels:
        params = copy.deepcopy(params)
        params['release_channel'] = chan
        searches.append(
            socorro.SuperSearch(params=params,
                                handler=handler,
                                handlerdata=data[chan]))

    for s in searches:
        s.wait()

    return data
Example #27
0
def get_buildids(search_date, channels, products):
    data = {p: {c: list() for c in channels} for p in products}

    def handler(chan, threshold, json, data):
        if not json['facets']['build_id']:
            return
        for facets in json['facets']['build_id']:
            count = facets['count']
            if count >= threshold:
                version = facets['facets']['version'][0]['term']
                if chan != 'beta' or not (version.endswith('a2')
                                          or version.endswith('b0')):
                    buildid = facets['term']
                    data.append((buildid, version, count))

    params = {
        'product': '',
        'release_channel': '',
        'date': search_date,
        '_aggs.build_id': 'version',
        '_results_number': 0,
        '_facets': 'release_channel',
        '_facets_size': 1000
    }

    queries = []
    for prod in products:
        pparams = copy.deepcopy(params)
        pparams['product'] = prod
        for chan in channels:
            params = copy.deepcopy(pparams)
            if chan == 'beta' and prod == 'Firefox':
                params['release_channel'] = ['beta', 'aurora']
            else:
                params['release_channel'] = chan
            threshold = config.get_min_total(prod, chan)
            hdler = functools.partial(handler, chan, threshold)
            queries.append(
                Query(socorro.SuperSearch.URL,
                      params=params,
                      handler=hdler,
                      handlerdata=data[prod][chan]))

    socorro.SuperSearch(queries=queries).wait()

    for prod, info in data.items():
        data[prod] = remove_dup_versions(info)

    res = {}
    for prod, info in data.items():
        res[prod] = d = {}
        for chan, bids in info.items():
            bids = sorted(bids)
            min_v = config.get_versions(prod, chan)
            if len(bids) > min_v:
                bids = bids[-min_v:]
            bids = [(utils.get_build_date(bid), v) for bid, v in bids]
            d[chan] = bids

    logger.info('Buildids for {}/{} got.'.format(products, channels))

    return res
Example #28
0
def get_sgns_data(channels, versions, signatures, products, date='today'):
    today = lmdutils.get_date_ymd(date)
    few_days_ago = today - relativedelta(days=config.get_limit())
    search_date = socorro.SuperSearch.get_search_date(few_days_ago)
    nbase = [0, 0]
    data = {}
    bids, all_bids, all_versions, doubloons = get_all_buildids(versions)

    for product in products:
        data[product] = d1 = {}
        b1 = bids[product]
        for chan in channels:
            d1[chan] = d2 = {}
            b2 = b1[chan]
            for signature in signatures:
                d2[signature] = b2

    limit = 80

    def handler(sgn, json, data):
        if not json['facets']['build_id']:
            return
        for facets in json['facets']['build_id']:
            bid = facets['term']
            prod, chan, ver = all_bids[str(bid)]
            _facets = facets['facets']
            chans = set()
            for c in _facets['release_channel']:
                c = c['term']
                if c == 'aurora':
                    chans.add('beta')
                else:
                    chans.add(c)

            if len(_facets['product']) != 1 or len(chans) != 1:
                bid = str(bid)
                doubloons[bid] = [(prod, chan, ver)]
            else:
                dpc = data[prod][chan]
                nums = dpc[sgn]
                bid = utils.get_build_date(bid)
                if isinstance(nums, list):
                    dpc[sgn] = nums = {b: copy.copy(nbase) for b in dpc[sgn]}
                if bid in nums:
                    n = nums[bid]
                    n[RAW] = facets['count']
                    N = len(_facets['install_time'])
                    if N == limit:
                        N = _facets['cardinality_install_time']['value']
                    n[INSTALLS] = N

    base_params = {
        'build_id':
        list(all_bids.keys()),
        'signature':
        '',
        'version':
        all_versions,
        'date':
        search_date,
        '_aggs.build_id': [
            'install_time', '_cardinality.install_time', 'release_channel',
            'product'
        ],
        '_results_number':
        0,
        '_facets':
        'signature',
        '_facets_size':
        limit
    }

    queries = []

    for signature in signatures:
        params = copy.deepcopy(base_params)
        params['signature'] = '=' + signature
        hdler = functools.partial(handler, signature)
        queries.append(
            Query(socorro.SuperSearch.URL,
                  params=params,
                  handler=hdler,
                  handlerdata=data))
    socorro.SuperSearch(queries=queries).wait()

    get_sgns_for_doubloons(doubloons, signatures, search_date, data)

    res = defaultdict(lambda: defaultdict(lambda: dict()))
    for p, i in data.items():
        for c, j in i.items():
            for sgn, numbers in j.items():
                if not isinstance(numbers, list):
                    res[p][c][sgn] = numbers

    return res
Example #29
0
def get_sgns_by_buildid(signatures, channels, products, search_date, bids):
    base = utils.get_base_list(bids)
    limit = config.get_limit_facets()

    logger.info('Get crash numbers for {}-{}: started.'.format(
        products, channels))

    def handler(base, index, json, data):
        if not json['facets']['signature']:
            return
        for facets in json['facets']['signature']:
            sgn = facets['term']
            if sgn not in data:
                data[sgn] = copy.deepcopy(base)
            data[sgn][index][RAW] = facets['count']
            facets = facets['facets']
            n = len(facets['install_time'])
            if n == limit:
                n = facets['cardinality_install_time']['value']
            data[sgn][index][INSTALLS] = n

    base_params = {
        'product': '',
        'release_channel': '',
        'build_id': '',
        'date': search_date,
        '_aggs.signature': ['install_time', '_cardinality.install_time'],
        '_results_number': 0,
        '_facets': 'release_channel',
        '_facets_size': limit
    }

    ratios = {}
    res = {}

    for prod in products:
        pparams = copy.deepcopy(base_params)
        pparams['product'] = prod
        base_prod = base[prod]
        bids_prod = bids[prod]
        ratios[prod] = ratios_prod = {}
        res[prod] = res_prod = {}
        for chan in channels:
            params = copy.deepcopy(pparams)
            params['release_channel'] = chan
            data = {}
            sbids = [b for b, _ in bids_prod[chan]]
            queries = []
            for index, bid in enumerate(sbids):
                params = copy.deepcopy(params)
                params['build_id'] = utils.get_buildid(bid)
                hdler = functools.partial(handler, base_prod[chan], index)
                queries.append(
                    Query(socorro.SuperSearch.URL,
                          params=params,
                          handler=hdler,
                          handlerdata=data))
            socorro.SuperSearch(queries=queries).wait()
            ratios_prod[chan] = tools.get_global_ratios(data)

            # now we've ratios, we can remove useless signatures
            res_prod[chan] = {s: n for s, n in data.items() if s in signatures}

    logger.info('Get crash numbers for {}-{}: finished.'.format(
        products, channels))
    return res, ratios
Example #30
0
def get(channel,
        date,
        product='Firefox',
        duration=11,
        tc_limit=50,
        crash_type='all',
        startup=False):
    """Get crashes info

    Args:
        channel (str): the channel
        date (str): the final date
        product (Optional[str]): the product
        duration (Optional[int]): the duration to retrieve the data
        tc_limit (Optional[int]): the number of topcrashes to load
        crash_type (Optional[str]): 'all' (default) or 'browser' or 'content' or 'plugin'

    Returns:
        dict: contains all the info relative to the crashes
    """
    channel = channel.lower()
    version = v[channel]
    sys.stdout.write('Getting version information from Socorro...')
    sys.stdout.flush()
    versions = versions_util.get_channel_versions(channel, product)
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    if crash_type and isinstance(crash_type, six.string_types):
        crash_type = [crash_type]

    _date = utils.get_date_ymd(date)
    start_date = utils.get_date_str(_date - timedelta(duration - 1))
    end_date = utils.get_date_str(_date)

    signatures = {}

    def signature_handler(json):
        for signature in json['facets']['signature']:
            signatures[signature['term']] = [signature['count'], 0, 0, 0, 0, 0]

            for platform in signature['facets']['platform']:
                if platform['term'] == 'Linux':
                    signatures[signature['term']][3] = platform['count']
                elif platform['term'] == 'Windows NT':
                    signatures[signature['term']][1] = platform['count']
                elif platform['term'] == 'Mac OS X':
                    signatures[signature['term']][2] = platform['count']

            for startup_crash in signature['facets']['startup_crash']:
                if startup_crash['term'] in ['1', 'T']:
                    signatures[signature['term']][4] += startup_crash['count']

            signatures[signature['term']][5] = signature['facets'][
                'cardinality_install_time']['value']

    params = {
        'product':
        product,
        'version':
        versions,
        'date':
        socorro.SuperSearch.get_search_date(start_date, end_date),
        'release_channel':
        channel,
        '_aggs.signature':
        ['platform', '_cardinality.install_time', 'startup_crash'],
        '_results_number':
        0,
        '_facets_size':
        tc_limit,
    }

    if startup:
        params['startup_crash'] = True

    sys.stdout.write('Getting top signatures from Socorro...')
    sys.stdout.flush()
    socorro.SuperSearch(params=params, handler=signature_handler).wait()
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    bug_flags = [
        'resolution', 'id', 'last_change_time',
        'cf_tracking_firefox' + str(version)
    ]
    for i in range(int(version), int(v['nightly']) + 1):
        bug_flags.append('cf_status_firefox' + str(i))

    # TODO: too many requests... should be improved with chunks
    bugs = {}
    # TODO: Use regexp, when the Bugzilla bug that prevents them from working will be fixed.
    base = {
        'j_top': 'OR',
        'o1': 'substring',
        'f1': 'cf_crash_signature',
        'v1': None,
        'o2': 'substring',
        'f2': 'cf_crash_signature',
        'v2': None,
        'o3': 'substring',
        'f3': 'cf_crash_signature',
        'v3': None,
        'o4': 'substring',
        'f4': 'cf_crash_signature',
        'v4': None,
        'include_fields': bug_flags
    }

    queries = []
    for sgn in signatures.keys():
        cparams = base.copy()
        cparams['v1'] = '[@' + sgn + ']'
        cparams['v2'] = '[@ ' + sgn + ' ]'
        cparams['v3'] = '[@ ' + sgn + ']'
        cparams['v4'] = '[@' + sgn + ' ]'
        bugs[sgn] = []
        queries.append(
            Query(Bugzilla.API_URL, cparams, __bug_handler, bugs[sgn]))
    res_bugs = Bugzilla(queries=queries)

    # we have stats by signature in self.signatures
    # for each signature get the number of crashes on the last X days
    # so get the signature trend
    trends = {}
    default_trend = {}
    for i in range(duration):
        default_trend[_date - timedelta(i)] = 0

    base = {
        'product': product,
        'version': versions,
        'signature': None,
        'date': socorro.SuperSearch.get_search_date(start_date, end_date),
        'release_channel': channel,
        '_results_number': 0,
        '_histogram.date': ['signature'],
        '_histogram_interval': 1
    }

    queries = []
    for sgns in Connection.chunks(
            list(map(lambda sgn: '=' + sgn, signatures.keys())), 10):
        sgn_group = []
        for sgn in sgns:
            if sum(len(s) for s in sgn_group) >= 1000:
                cparams = base.copy()
                cparams['signature'] = sgn_group
                queries.append(
                    Query(socorro.SuperSearch.URL, cparams,
                          functools.partial(__trend_handler, default_trend),
                          trends))
                sgn_group = []

            sgn_group.append(sgn)

        if len(sgn_group) > 0:
            cparams = base.copy()
            cparams['signature'] = sgn_group
            queries.append(
                Query(socorro.SuperSearch.URL, cparams,
                      functools.partial(__trend_handler, default_trend),
                      trends))

    sys.stdout.write('Getting trends for top signatures from Socorro...')
    sys.stdout.flush()
    socorro.SuperSearch(queries=queries).wait()
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    for sgn, trend in trends.items():
        signatures[sgn] = (signatures[sgn], [
            trend[key] for key in sorted(trend.keys(), reverse=True)
        ])

    _signatures = {}
    # order self.signatures by crash count
    sorted_signatures = sorted(signatures.items(),
                               key=lambda x: x[1][0][0],
                               reverse=True)
    i = 1
    for s in sorted_signatures:
        _signatures[s[0]] = i  # top crash rank
        i += 1

    sys.stdout.write(
        'Getting bugs linked to the top signatures from Bugzilla...')
    sys.stdout.flush()
    while True:
        try:
            for r in res_bugs.results:
                r.result(timeout=2)
            break
        except TimeoutError:
            sys.stdout.write('.')
            sys.stdout.flush()
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    # TODO: In the first query to get the bugs, also get dupe_of and avoid the first query
    #       in follow_dup (so modify follow_dup to accept both a bug ID or a bug object).
    queries = []
    for sgn in signatures.keys():
        duplicate_ids = [
            bug['id'] for bug in bugs[sgn] if bug['resolution'] == 'DUPLICATE'
        ]

        # Remove bugs resolved as DUPLICATE from the list of bugs associated to the signature.
        bugs[sgn] = [
            bug for bug in bugs[sgn] if bug['id'] not in duplicate_ids
        ]

        # Find duplicates for bugs resolved as DUPLICATE.
        duplicates = {
            k: v
            for k, v in Bugzilla.follow_dup(duplicate_ids).items()
            if v is not None
        }
        duplicate_targets = [
            bug_id for bug_id in duplicates.values()
            if int(bug_id) not in [bug['id'] for bug in bugs[sgn]]
        ]
        if len(duplicate_targets) == 0:
            continue

        # Get info about bugs that the DUPLICATE bugs have been duped to.
        params = {
            'id': ','.join(duplicate_targets),
            'include_fields': bug_flags,
        }
        queries.append(
            Query(Bugzilla.API_URL, params, __bug_handler, bugs[sgn]))
    sys.stdout.write(
        'Resolving duplicate bugs to the bugs they\'ve been duplicated to...')
    sys.stdout.flush()
    Bugzilla(queries=queries).wait()
    sys.stdout.write(' ✔\n')
    sys.stdout.flush()

    for sgn, stats in signatures.items():
        # stats is 2-uple: ([count, win_count, mac_count, linux_count, startup_count], trend)
        startup_percent = float(stats[0][4]) / float(stats[0][0])
        _signatures[sgn] = {
            'tc_rank': _signatures[sgn],
            'crash_count': stats[0][0],
            'estimated_user_count': stats[0][5],
            'startup_percent': startup_percent,
            'bugs': bugs[sgn]
        }

    return {
        'start_date': start_date,
        'end_date': end_date,
        'versions': list(versions),
        'signatures': _signatures,
    }