def test_load_whitelist(s3_fixture):
    conn, data = s3_fixture

    etl = taar_amowhitelist.AMOTransformer(taar_amowhitelist.AMO_DUMP_BUCKET,
                                           taar_amowhitelist.AMO_DUMP_PREFIX,
                                           taar_amowhitelist.AMO_DUMP_FILENAME,
                                           taar_amowhitelist.MIN_RATING,
                                           taar_amowhitelist.MIN_AGE)
    etl.transform(data)

    etl.load()

    s3 = boto3.resource('s3', region_name='us-west-2')
    bucket_obj = s3.Bucket(taar_amowhitelist.AMO_DUMP_BUCKET)

    available_objects = list(
        bucket_obj.objects.filter(Prefix=taar_amowhitelist.AMO_DUMP_PREFIX))

    # Check that whitelist file exists
    full_s3_name = '{}{}'.format(taar_amowhitelist.AMO_DUMP_PREFIX,
                                 taar_amowhitelist.FILTERED_AMO_FILENAME)
    keys = [o.key for o in available_objects]
    assert full_s3_name in keys

    # Check that featured addon file exists
    full_s3_name = '{}{}'.format(taar_amowhitelist.AMO_DUMP_PREFIX,
                                 taar_amowhitelist.FEATURED_FILENAME)
    keys = [o.key for o in available_objects]
    assert full_s3_name in keys
def test_transform_whitelist(s3_fixture):
    '''
    The transform for the AMOTransformer is just filtering by
    age using `first_create_date` and using the ratings.average
    with a minimum of 3.0
    '''

    conn, data = s3_fixture
    etl = taar_amowhitelist.AMOTransformer(taar_amowhitelist.AMO_DUMP_BUCKET,
                                           taar_amowhitelist.AMO_DUMP_PREFIX,
                                           taar_amowhitelist.AMO_DUMP_FILENAME,
                                           taar_amowhitelist.MIN_RATING,
                                           taar_amowhitelist.MIN_AGE)
    etl.transform(data)

    final_jdata = etl.get_whitelist()
    assert len(final_jdata) == 1

    today = datetime.datetime.today().replace(tzinfo=None)
    for client_data in final_jdata.values():
        assert client_data['current_version']['files'][0]['is_webextension']
        assert client_data['ratings']['average'] >= taar_amowhitelist.MIN_RATING
        create_datetime = parse(
            client_data['first_create_date']).replace(tzinfo=None)
        assert create_datetime + datetime.timedelta(
            days=taar_amowhitelist.MIN_AGE) < today
        assert 'is_featured' in client_data
        # Verify that the platform data is in the transform output
        assert client_data['current_version']['files'][0][
            'platform'] in VALID_PLATFORMS
Beispiel #3
0
def test_transform_featuredlist(s3_fixture):
    """
    The transform for the AMOTransformer is just filtering by
    age using `first_create_date` and using the ratings.average
    with a minimum of 3.0
    """

    conn, data = s3_fixture
    etl = taar_amowhitelist.AMOTransformer(
        taar_amowhitelist.AMO_DUMP_BUCKET,
        taar_amowhitelist.AMO_DUMP_PREFIX,
        taar_amowhitelist.AMO_DUMP_FILENAME,
        taar_amowhitelist.MIN_RATING,
        taar_amowhitelist.MIN_AGE,
    )
    etl.transform(data)

    final_jdata = etl.get_featuredlist()

    # There's 4 records in SAMPLE_DATA - only one is marked is not
    # featured
    assert len(final_jdata) == 3

    for rec in list(final_jdata.values()):
        assert rec["is_featured"]
def test_extract(s3_fixture):
    '''
    The transform for the AMOTransformer is just filtering by
    age using `first_create_date` and using the ratings.average
    with a minimum of 3.0
    '''

    etl = taar_amowhitelist.AMOTransformer(taar_amowhitelist.AMO_DUMP_BUCKET,
                                           taar_amowhitelist.AMO_DUMP_PREFIX,
                                           taar_amowhitelist.AMO_DUMP_FILENAME,
                                           taar_amowhitelist.MIN_RATING,
                                           taar_amowhitelist.MIN_AGE)
    jdata = etl.extract()
    assert jdata == SAMPLE_DATA
def test_load(s3_fixture):
    conn, data = s3_fixture

    etl = taar_amowhitelist.AMOTransformer(taar_amowhitelist.AMO_DUMP_BUCKET,
                                           taar_amowhitelist.AMO_DUMP_PREFIX,
                                           taar_amowhitelist.AMO_DUMP_FILENAME,
                                           taar_amowhitelist.MIN_RATING,
                                           taar_amowhitelist.MIN_AGE)
    etl.load(EXPECTED_FINAL_JDATA)

    s3 = boto3.resource('s3', region_name='us-west-2')
    bucket_obj = s3.Bucket(taar_amowhitelist.AMO_DUMP_BUCKET)

    available_objects = list(bucket_obj.objects.filter(Prefix=taar_amowhitelist.AMO_DUMP_PREFIX))
    # Check that our file is there.
    full_s3_name = '{}{}'.format(taar_amowhitelist.AMO_DUMP_PREFIX,
                                 taar_amowhitelist.FILTERED_AMO_FILENAME)
    keys = [o.key for o in available_objects]
    assert full_s3_name in keys
def test_transform(s3_fixture):
    '''
    The transform for the AMOTransformer is just filtering by
    age using `first_create_date` and using the ratings.average
    with a minimum of 3.0
    '''

    conn, data = s3_fixture
    etl = taar_amowhitelist.AMOTransformer(taar_amowhitelist.AMO_DUMP_BUCKET,
                                           taar_amowhitelist.AMO_DUMP_PREFIX,
                                           taar_amowhitelist.AMO_DUMP_FILENAME,
                                           taar_amowhitelist.MIN_RATING,
                                           taar_amowhitelist.MIN_AGE)
    final_jdata = etl.transform(data)
    assert len(final_jdata) == 2

    today = datetime.datetime.today().replace(tzinfo=None)
    for client_data in final_jdata.values():
        assert client_data['ratings']['average'] >= taar_amowhitelist.MIN_RATING
        create_datetime = parse(client_data['first_create_date']).replace(tzinfo=None)
        assert create_datetime + datetime.timedelta(days=taar_amowhitelist.MIN_AGE) < today