Ejemplo n.º 1
0
def test_get_pings_none_filter(test_store, mock_message_parser, spark_context):
    upload_ping(test_store, 'value1', app='Firefox')
    upload_ping(test_store, 'value2', app='Thuderbird')
    pings = get_pings(spark_context, app=None)

    assert sorted(pings.collect()) == ['value1', 'value2']

    pings = get_pings(spark_context, app='*')

    assert sorted(pings.collect()) == ['value1', 'value2']
Ejemplo n.º 2
0
def test_get_pings_multiple_by_range(test_store, mock_message_parser, spark_context):
    upload_ping(test_store, 'value1', **{f[0]: f[1] for f in test_data_for_range_match})
    upload_ping(test_store, 'value2', **{f[0]: f[2] for f in test_data_for_range_match})
    pings = get_pings(spark_context, **{f[0]: f[1] for f in test_data_for_range_match})

    assert pings.collect() == ['value1']

    pings = get_pings(spark_context, **{f[0]: (f[3], f[4]) for f in test_data_for_range_match})

    assert pings.collect() == ['value1']
Ejemplo n.º 3
0
def test_get_pings_none_filter(test_store, mock_message_parser, spark_context):
    upload_ping(test_store, 'value1', app='Firefox')
    upload_ping(test_store, 'value2', app='Thuderbird')
    pings = get_pings(spark_context, app=None)

    assert sorted(pings.collect()) == ['value1', 'value2']

    pings = get_pings(spark_context, app='*')

    assert sorted(pings.collect()) == ['value1', 'value2']
Ejemplo n.º 4
0
def test_get_pings_fraction(test_store, mock_message_parser, spark_context):
    for i in range(1, 10 + 1):
        upload_ping(test_store, 'value', build_id=str(i))

    pings = get_pings(spark_context)

    assert pings.count() == 10

    pings = get_pings(spark_context, fraction=0.1)

    assert pings.count() == 1
Ejemplo n.º 5
0
def test_get_pings_by_range(test_store, mock_message_parser, spark_context,
                            filter_name, exact, wrong, start, end):
    upload_ping(test_store, 'value1', **{filter_name: exact})
    upload_ping(test_store, 'value2', **{filter_name: wrong})
    pings = get_pings(spark_context, **{filter_name: exact})

    assert pings.collect() == ['value1']

    pings = get_pings(spark_context, **{filter_name: (start, end)})

    assert pings.collect() == ['value1']
Ejemplo n.º 6
0
def test_get_pings_by_range(test_store, mock_message_parser, spark_context,
                            filter_name, exact, wrong, start, end):
    upload_ping(test_store, 'value1', **{filter_name: exact})
    upload_ping(test_store, 'value2', **{filter_name: wrong})
    pings = get_pings(spark_context, **{filter_name: exact})

    assert pings.collect() == ['value1']

    pings = get_pings(spark_context, **{filter_name: (start, end)})

    assert pings.collect() == ['value1']
Ejemplo n.º 7
0
def test_get_pings_fraction(test_store, mock_message_parser, spark_context):
    for i in range(1, 10+1):
        upload_ping(test_store, 'value', build_id=str(i))

    pings = get_pings(spark_context)

    assert pings.count() == 10

    pings = get_pings(spark_context, fraction=0.1)

    assert pings.count() == 1
def retrieve_crash_data(sc, submission_date_range, comparable_dimensions,
                        fraction):
    # get the raw data
    normal_pings = get_pings(sc,
                             doc_type="main",
                             submission_date=submission_date_range,
                             fraction=fraction)
    crash_pings = get_pings(sc,
                            doc_type="crash",
                            submission_date=submission_date_range,
                            fraction=fraction)

    return normal_pings.union(crash_pings)
def retrieve_crash_data(sc, submission_date_range, comparable_dimensions, fraction):
    # get the raw data
    normal_pings = get_pings(
        sc, doc_type="main",
        submission_date=submission_date_range,
        fraction=fraction
    )
    crash_pings = get_pings(
        sc, doc_type="crash",
        submission_date=submission_date_range,
        fraction=fraction
    )

    return normal_pings.union(crash_pings)
Ejemplo n.º 10
0
def test_get_pings_multiple_filters(test_store, mock_message_parser, spark_context):
    filters = dict(submission_date='20160101', channel='beta')
    upload_ping(test_store, 'value1', **filters)
    filters['app'] = 'Thunderbird'
    upload_ping(test_store, 'value2', **filters)
    pings = get_pings(spark_context, **filters)

    assert pings.collect() == ['value2']
Ejemplo n.º 11
0
def test_get_pings_by_exact_match(test_store, dummy_pool_executor,
                                  mock_message_parser, spark_context,
                                  filter_name, exact, wrong):
    upload_ping(test_store, 'value1', **{filter_name: exact})
    upload_ping(test_store, 'value2', **{filter_name: wrong})
    pings = get_pings(spark_context, **{filter_name: exact})

    assert pings.collect() == ['value1']
Ejemplo n.º 12
0
def test_get_pings_by_exact_match(test_store, dummy_pool_executor,
                                  mock_message_parser, spark_context,
                                  filter_name, exact, wrong):
    upload_ping(test_store, 'value1', **{filter_name: exact})
    upload_ping(test_store, 'value2', **{filter_name: wrong})
    pings = get_pings(spark_context, **{filter_name: exact})

    assert pings.collect() == ['value1']
Ejemplo n.º 13
0
def test_get_pings_multiple_filters(test_store, mock_message_parser, spark_context):
    filters = dict(submission_date='20160101', channel='beta')
    upload_ping(test_store, 'value1', **filters)
    filters['app'] = 'Thunderbird'
    upload_ping(test_store, 'value2', **filters)
    pings = get_pings(spark_context, **filters)

    assert pings.collect() == ['value2']
Ejemplo n.º 14
0
def test_get_pings_multiple_by_range(test_store, dummy_pool_executor,
                                     mock_message_parser, spark_context):
    upload_ping(test_store, 'value1',
                **{f[0]: f[1]
                   for f in test_data_for_range_match})
    upload_ping(test_store, 'value2',
                **{f[0]: f[2]
                   for f in test_data_for_range_match})
    pings = get_pings(spark_context,
                      **{f[0]: f[1]
                         for f in test_data_for_range_match})

    assert pings.collect() == ['value1']

    pings = get_pings(
        spark_context,
        **{f[0]: (f[3], f[4])
           for f in test_data_for_range_match})

    assert pings.collect() == ['value1']
Ejemplo n.º 15
0
def aggregate_metrics(sc, channels, submission_date, fraction=1):
    """ Returns the build-id and submission date aggregates for a given submission date.

    :param sc: A SparkContext instance
    :param channel: Either the name of a channel or a list/tuple of names
    :param submission-date: The submission date for which the data will be aggregated
    :param fraction: An approximative fraction of submissions to consider for aggregation
    """
    if not isinstance(channels, (tuple, list)):
        channels = [channels]

    channels = set(channels)
    rdds = [get_pings(sc, channel=ch, submission_date=submission_date, doc_type="saved_session", schema="v4", fraction=fraction) for ch in channels]
    pings = reduce(lambda x, y: x.union(y), rdds)
    return _aggregate_metrics(pings)
Ejemplo n.º 16
0
def aggregate_metrics(sc, channels, submission_date, fraction=1):
    """ Returns the build-id and submission date aggregates for a given submission date.

    :param sc: A SparkContext instance
    :param channel: Either the name of a channel or a list/tuple of names
    :param submission-date: The submission date for which the data will be aggregated
    :param fraction: An approximative fraction of submissions to consider for aggregation
    """
    if not isinstance(channels, (tuple, list)):
        channels = [channels]

    channels = set(channels)
    rdds = [get_pings(sc, channel=ch, submission_date=submission_date, doc_type="saved_session", schema="v4", fraction=fraction) for ch in channels]
    pings = reduce(lambda x, y: x.union(y), rdds)
    return _aggregate_metrics(pings)
Ejemplo n.º 17
0
def test_get_pings_propertiess_keyedHistogram_with_processes(
        test_store, dummy_pool_executor, mock_message_parser, spark_context):

    measures = {
        "payload": {
            "processes": {
                "content": {
                    "keyedHistograms": {
                        "TEST": {
                            "key1": {
                                "values": {
                                    "0": 2
                                }
                            },
                        }
                    }
                }
            },
            "keyedHistograms": {
                "TEST": {
                    "key1": {
                        "values": {
                            "0": 1
                        }
                    },
                },
            }
        }
    }

    field = 'payload/keyedHistograms/TEST'

    upload_ping(test_store, json.dumps(measures))

    pings = get_pings(spark_context)
    filtered_pings = get_pings_properties(
        pings, [field],
        with_processes=True,
        additional_histograms=additional_histograms)

    res = (filtered_pings.map(lambda d: d.get(field)).filter(
        lambda p: p is not None and len(p.keys()) > 0))
    assert res.count() == 1

    hist = res.first()
    assert hist['key1_parent'] == 1
    assert hist['key1_children'] == 2
    assert hist['key1'] == 3
Ejemplo n.º 18
0
def test_get_pings_wrong_schema(test_store, dummy_pool_executor,
                                mock_message_parser, spark_context):
    with pytest.raises(ValueError):
        get_pings(spark_context, schema=1)
Ejemplo n.º 19
0
def test_get_pings_properties_keyedHistogram_exists_with_process(
        test_store, dummy_pool_executor, mock_message_parser, spark_context):

    # Before Firefox 51, histograms could be found in the child
    # payloads. This should handle obtaining histograms to keep
    # behavior consistent.
    child_measures = {
        "payload": {
            "childPayloads": [
                {
                    "keyedHistograms": {}
                },  # empty keyedHistogram
                {},  # missing keyedHistogram
            ],
            "keyedHistograms": {
                "TEST": {
                    "key1": {
                        "values": {
                            "0": 1
                        }
                    },
                },
            }
        }
    }

    # The histograms for all child processes are aggregated in the
    # content process. Here, keyedHistograms do not exist in
    # the content process.
    content_measures = {
        "payload": {
            "processes": {
                "content": {
                    "keyedHistograms": {}
                }
            },
            "keyedHistograms": {
                "TEST": {
                    "key1": {
                        "values": {
                            "0": 1
                        }
                    },
                },
            }
        }
    }

    field = 'payload/keyedHistograms/TEST'

    upload_ping(test_store, json.dumps(child_measures))
    upload_ping(test_store, json.dumps(content_measures))

    pings = get_pings(spark_context)
    filtered_pings = get_pings_properties(
        pings, [field], additional_histograms=additional_histograms)

    res = (filtered_pings.map(lambda d: d.get(field)).filter(
        lambda p: p is not None and len(p.keys()) > 0))

    # assert existence
    assert res.count() == 2
Ejemplo n.º 20
0
def test_get_pings_wrong_schema(test_store, dummy_pool_executor,
                                mock_message_parser, spark_context):
    with pytest.raises(ValueError):
        get_pings(spark_context, schema=1)
Ejemplo n.º 21
0
def test_get_pings_wrong_schema(test_store, mock_message_parser, spark_context):
    with pytest.raises(ValueError):
        pings = get_pings(spark_context, schema=1)
Ejemplo n.º 22
0
def test_get_pings_wrong_schema(test_store, mock_message_parser,
                                spark_context):
    with pytest.raises(ValueError):
        pings = get_pings(spark_context, schema=1)