Example #1
0
    def test_keyset_parts_preprocess(self):
        def foo(parts, params):
            parts['bar'] = 1
            yield parts

        rule = InfernoRule(keysets={
            'keyset1': Keyset(parts_preprocess=[foo]),
        })
        funcs = rule.params.keysets['keyset1']['parts_preprocess']
        eq_(funcs, [foo])
        actual = funcs[0]({'hello': 'world'}, None)
        eq_(list(actual), [{'bar': 1, 'hello': 'world'}])
Example #2
0
         'es-mx', 'et', 'eu', 'fa', 'ff', 'fi', 'fr', 'fy-nl', 'ga-ie',
         'gd', 'gl', 'gu-in', 'he', 'hi-in', 'hr', 'hu', 'hsb', 'hy-am',
         'id', 'is', 'it', 'ja', 'ja-jp-mac', 'ka', 'kk', 'km', 'kn', 'ko',
         'ku', 'lij', 'lt', 'lv', 'mai', 'mk', 'ml', 'mr', 'ms', 'my',
         'nb-no', 'nl', 'nn-no', 'oc', 'or', 'pa-in', 'pl', 'pt-br',
         'pt-pt', 'rm', 'ro', 'ru', 'si', 'sk', 'sl', 'son', 'sq', 'sr',
         'sv-se', 'sw', 'ta', 'te', 'th', 'tr', 'uk', 'ur', 'vi', 'xh',
         'zh-cn', 'zh-tw', 'zu'
     },
     combiner_function=combiner,
     keysets={
         'impression_stats':
         Keyset(
             key_parts=['date', 'locale', 'tile_id', 'country_code'],
             value_parts=[
                 'impressions', 'clicks', 'pinned', 'blocked', 'sponsored',
                 'sponsored_link'
             ],
         ),
     },
 ),
 InfernoRule(
     name='application_stats',
     source_tags=['incoming:app'],
     day_range=1,
     map_input_stream=chunk_json_stream,
     map_init_function=impression_stats_init,
     parts_preprocess=[parse_date, parse_ip, parse_ua, count],
     geoip_file=GEOIP,
     partitions=32,
     sort_buffer_size='25%',
Example #3
0
    def test_keysets(self):
        #        # no key sets
        #        rule = InfernoRule()
        #        eq_(rule.params.keysets, {})

        # one key set
        rule = InfernoRule(key_parts=['id'],
                           value_parts=['count'],
                           table='some_table',
                           column_mappings={'id': 'some_id'})
        keysets = {
            '_default': {
                'column_mappings': {
                    'id': 'some_id'
                },
                'table': 'some_table',
                'value_parts': ['count'],
                'key_parts': ['_keyset', 'id'],
                'parts_preprocess': [],
                'parts_postprocess': []
            }
        }
        eq_(rule.params.keysets, keysets)

        # many key sets
        rule = InfernoRule(
            keysets={
                'keyset1':
                Keyset(key_parts=['id1'],
                       value_parts=['count1'],
                       column_mappings={'id1': 'some_id1'},
                       table='some_table1'),
                'keyset2':
                Keyset(key_parts=['id2'],
                       value_parts=['count2'],
                       column_mappings={'id2': 'some_id2'},
                       table='some_table2')
            })
        keysets = {
            'keyset1': {
                'column_mappings': {
                    'id1': 'some_id1'
                },
                'table': 'some_table1',
                'value_parts': ['count1'],
                'key_parts': ['_keyset', 'id1'],
                'parts_preprocess': [],
                'parts_postprocess': [],
            },
            'keyset2': {
                'column_mappings': {
                    'id2': 'some_id2'
                },
                'table': 'some_table2',
                'value_parts': ['count2'],
                'key_parts': ['_keyset', 'id2'],
                'parts_preprocess': [],
                'parts_postprocess': [],
            },
        }
        eq_(rule.params.keysets, keysets)
Example #4
0
from inferno.lib.rule import InfernoRule
from inferno.lib.rule import Keyset

AUTORUN = True

RULES = [
    InfernoRule(name='automatic_rule_4',
                keysets={
                    'keyset_1':
                    Keyset(
                        key_parts=['key_1'],
                        value_parts=['value_1'],
                    ),
                    'keyset_2':
                    Keyset(key_parts=['key_2'], value_parts=['value_2'])
                }),
]
Example #5
0
from inferno.lib.rule import InfernoRule
from inferno.lib.rule import Keyset


RULES = [
    InfernoRule(
        name='manual_rule_4',
        keysets={
            'keyset_1':Keyset(
                key_parts=['key_1'],
                value_parts=['value_1'],
             ),
            'keyset_2':Keyset(
                key_parts=['key_2'],
                value_parts=['value_2']
             )
        }
    ),
]
     geoip_file=GEOIP,
     partitions=32,
     sort_buffer_size='25%',
     locale_whitelist=LOCALE_WHITELIST,
     result_processor=partial(insert_redshift,
                              host=RS_HOST,
                              port=RS_PORT,
                              database=RS_DB,
                              user=RS_USER,
                              password=RS_PASSWORD,
                              bucket_name=RS_BUCKET),
     combiner_function=combiner,
     keysets={
         'impression_stats': Keyset(
             key_parts=['date', 'position', 'locale', 'tile_id', 'country_code', 'os', 'browser',
                        'version', 'device', 'year', 'month', 'week', 'enhanced', 'blacklisted'],
             value_parts=['impressions', 'clicks', 'pinned', 'blocked', 'sponsored', 'sponsored_link'],
             table='impression_stats_daily'),
         'site_stats': Keyset(
             key_parts=['date', 'locale', 'country_code', 'os', 'browser', 'version', 'device', 'year',
                        'month', 'week', 'url'],
             value_parts=['impressions', 'clicks', 'pinned', 'blocked', 'sponsored', 'sponsored_link'],
             table='site_stats_daily',
         ),
         'newtab_stats': Keyset(
             key_parts=['date', 'locale', 'country_code', 'os', 'browser', 'version', 'device', 'year',
                        'month', 'week'],
             value_parts=['newtabs'],
             table='newtab_stats_daily')
     }
 ),
Example #7
0
# an example keyset parts_preprocess that works only for a specific keyset
def count_again(parts, params):
    parts['count'] = parts['count'] + 1
    yield parts


RULES = [
    InfernoRule(name='last_names_json',
                source_tags=['example:chunk:users'],
                map_input_stream=chunk_json_stream,
                parts_preprocess=[count],
                partitions=2,
                keysets={
                    'last_name_keyset':
                    Keyset(key_parts=['last'],
                           value_parts=['count'],
                           parts_preprocess=[count_again])
                }),
    InfernoRule(
        name='last_names_csv',
        source_tags=['example:chunk:users'],
        map_input_stream=chunk_csv_stream,
        csv_fields=('first', 'last'),
        csv_dialect='excel',
        parts_preprocess=[count],
        partitions=2,
        key_parts=['last'],
        value_parts=['count'],
    ),
    InfernoRule(name='last_names_result',
                source_tags=['example:chunk:users'],
Example #8
0
from inferno.lib.rule import Keyset
from infernyx.rules import combiner
from config_infernyx import *

AUTO_RUN = False


def count(parts, params):
    parts['count'] = 1
    yield parts


RULES = [

    InfernoRule(
        name='count_fetches',
        source_tags=['incoming:app'],
        day_range=1,
        map_input_stream=chunk_json_stream,
        parts_preprocess=[count],
        geoip_file=GEOIP,
        combiner_function=combiner,
        keysets={
            'stats': Keyset(
                key_parts=['date', 'ver', 'locale', 'action'],
                value_parts=['count'],
            ),
        },
    ),
]
Example #9
0
 result_processor=partial(insert_redshift,
                          host=RS_HOST,
                          port=5432,
                          database=RS_DB,
                          user=RS_USER,
                          password=RS_PASSWORD,
                          bucket_name=RS_BUCKET),
 combiner_function=combiner,
 keysets={
     'impression_stats':
     Keyset(
         key_parts=[
             'date', 'position', 'locale', 'tile_id', 'country_code',
             'os', 'browser', 'version', 'device', 'year', 'month',
             'week', 'enhanced'
         ],
         value_parts=[
             'impressions', 'clicks', 'pinned', 'blocked', 'sponsored',
             'sponsored_link'
         ],
         table='impression_stats_daily',
     ),
     'site_stats':
     Keyset(
         key_parts=[
             'date', 'locale', 'country_code', 'os', 'browser',
             'version', 'device', 'year', 'month', 'week', 'url'
         ],
         value_parts=[
             'impressions', 'clicks', 'pinned', 'blocked', 'sponsored',
             'sponsored_link'
         ],
Example #10
0
     'contbr_occupation',
     'contb_receipt_amt',
     'contb_receipt_dt',
     'receipt_desc',
     'memo_cd',
     'memo_text',
     'form_tp',
     'file_num',
 ),
 csv_dialect='excel',
 keysets={
     'by_candidate':
     Keyset(
         key_parts=['cand_nm'],
         value_parts=['count', 'contb_receipt_amt'],
         column_mappings={
             'cand_nm': 'candidate',
             'contb_receipt_amt': 'amount',
         },
     ),
     'by_occupation':
     Keyset(
         key_parts=['contbr_occupation', 'cand_nm'],
         value_parts=['count', 'contb_receipt_amt'],
         column_mappings={
             'count': 'count_occupation_candidate',
             'cand_nm': 'candidate',
             'contb_receipt_amt': 'amount',
             'contbr_occupation': 'occupation',
         },
     )
 })