def test_keyset_parts_preprocess(self): def foo(parts, params): parts['bar'] = 1 yield parts rule = InfernoRule(keysets={ 'keyset1': Keyset(parts_preprocess=[foo]), }) funcs = rule.params.keysets['keyset1']['parts_preprocess'] eq_(funcs, [foo]) actual = funcs[0]({'hello': 'world'}, None) eq_(list(actual), [{'bar': 1, 'hello': 'world'}])
'es-mx', 'et', 'eu', 'fa', 'ff', 'fi', 'fr', 'fy-nl', 'ga-ie', 'gd', 'gl', 'gu-in', 'he', 'hi-in', 'hr', 'hu', 'hsb', 'hy-am', 'id', 'is', 'it', 'ja', 'ja-jp-mac', 'ka', 'kk', 'km', 'kn', 'ko', 'ku', 'lij', 'lt', 'lv', 'mai', 'mk', 'ml', 'mr', 'ms', 'my', 'nb-no', 'nl', 'nn-no', 'oc', 'or', 'pa-in', 'pl', 'pt-br', 'pt-pt', 'rm', 'ro', 'ru', 'si', 'sk', 'sl', 'son', 'sq', 'sr', 'sv-se', 'sw', 'ta', 'te', 'th', 'tr', 'uk', 'ur', 'vi', 'xh', 'zh-cn', 'zh-tw', 'zu' }, combiner_function=combiner, keysets={ 'impression_stats': Keyset( key_parts=['date', 'locale', 'tile_id', 'country_code'], value_parts=[ 'impressions', 'clicks', 'pinned', 'blocked', 'sponsored', 'sponsored_link' ], ), }, ), InfernoRule( name='application_stats', source_tags=['incoming:app'], day_range=1, map_input_stream=chunk_json_stream, map_init_function=impression_stats_init, parts_preprocess=[parse_date, parse_ip, parse_ua, count], geoip_file=GEOIP, partitions=32, sort_buffer_size='25%',
def test_keysets(self): # # no key sets # rule = InfernoRule() # eq_(rule.params.keysets, {}) # one key set rule = InfernoRule(key_parts=['id'], value_parts=['count'], table='some_table', column_mappings={'id': 'some_id'}) keysets = { '_default': { 'column_mappings': { 'id': 'some_id' }, 'table': 'some_table', 'value_parts': ['count'], 'key_parts': ['_keyset', 'id'], 'parts_preprocess': [], 'parts_postprocess': [] } } eq_(rule.params.keysets, keysets) # many key sets rule = InfernoRule( keysets={ 'keyset1': Keyset(key_parts=['id1'], value_parts=['count1'], column_mappings={'id1': 'some_id1'}, table='some_table1'), 'keyset2': Keyset(key_parts=['id2'], value_parts=['count2'], column_mappings={'id2': 'some_id2'}, table='some_table2') }) keysets = { 'keyset1': { 'column_mappings': { 'id1': 'some_id1' }, 'table': 'some_table1', 'value_parts': ['count1'], 'key_parts': ['_keyset', 'id1'], 'parts_preprocess': [], 'parts_postprocess': [], }, 'keyset2': { 'column_mappings': { 'id2': 'some_id2' }, 'table': 'some_table2', 'value_parts': ['count2'], 'key_parts': ['_keyset', 'id2'], 'parts_preprocess': [], 'parts_postprocess': [], }, } eq_(rule.params.keysets, keysets)
from inferno.lib.rule import InfernoRule from inferno.lib.rule import Keyset AUTORUN = True RULES = [ InfernoRule(name='automatic_rule_4', keysets={ 'keyset_1': Keyset( key_parts=['key_1'], value_parts=['value_1'], ), 'keyset_2': Keyset(key_parts=['key_2'], value_parts=['value_2']) }), ]
from inferno.lib.rule import InfernoRule from inferno.lib.rule import Keyset RULES = [ InfernoRule( name='manual_rule_4', keysets={ 'keyset_1':Keyset( key_parts=['key_1'], value_parts=['value_1'], ), 'keyset_2':Keyset( key_parts=['key_2'], value_parts=['value_2'] ) } ), ]
geoip_file=GEOIP, partitions=32, sort_buffer_size='25%', locale_whitelist=LOCALE_WHITELIST, result_processor=partial(insert_redshift, host=RS_HOST, port=RS_PORT, database=RS_DB, user=RS_USER, password=RS_PASSWORD, bucket_name=RS_BUCKET), combiner_function=combiner, keysets={ 'impression_stats': Keyset( key_parts=['date', 'position', 'locale', 'tile_id', 'country_code', 'os', 'browser', 'version', 'device', 'year', 'month', 'week', 'enhanced', 'blacklisted'], value_parts=['impressions', 'clicks', 'pinned', 'blocked', 'sponsored', 'sponsored_link'], table='impression_stats_daily'), 'site_stats': Keyset( key_parts=['date', 'locale', 'country_code', 'os', 'browser', 'version', 'device', 'year', 'month', 'week', 'url'], value_parts=['impressions', 'clicks', 'pinned', 'blocked', 'sponsored', 'sponsored_link'], table='site_stats_daily', ), 'newtab_stats': Keyset( key_parts=['date', 'locale', 'country_code', 'os', 'browser', 'version', 'device', 'year', 'month', 'week'], value_parts=['newtabs'], table='newtab_stats_daily') } ),
# an example keyset parts_preprocess that works only for a specific keyset def count_again(parts, params): parts['count'] = parts['count'] + 1 yield parts RULES = [ InfernoRule(name='last_names_json', source_tags=['example:chunk:users'], map_input_stream=chunk_json_stream, parts_preprocess=[count], partitions=2, keysets={ 'last_name_keyset': Keyset(key_parts=['last'], value_parts=['count'], parts_preprocess=[count_again]) }), InfernoRule( name='last_names_csv', source_tags=['example:chunk:users'], map_input_stream=chunk_csv_stream, csv_fields=('first', 'last'), csv_dialect='excel', parts_preprocess=[count], partitions=2, key_parts=['last'], value_parts=['count'], ), InfernoRule(name='last_names_result', source_tags=['example:chunk:users'],
from inferno.lib.rule import Keyset from infernyx.rules import combiner from config_infernyx import * AUTO_RUN = False def count(parts, params): parts['count'] = 1 yield parts RULES = [ InfernoRule( name='count_fetches', source_tags=['incoming:app'], day_range=1, map_input_stream=chunk_json_stream, parts_preprocess=[count], geoip_file=GEOIP, combiner_function=combiner, keysets={ 'stats': Keyset( key_parts=['date', 'ver', 'locale', 'action'], value_parts=['count'], ), }, ), ]
result_processor=partial(insert_redshift, host=RS_HOST, port=5432, database=RS_DB, user=RS_USER, password=RS_PASSWORD, bucket_name=RS_BUCKET), combiner_function=combiner, keysets={ 'impression_stats': Keyset( key_parts=[ 'date', 'position', 'locale', 'tile_id', 'country_code', 'os', 'browser', 'version', 'device', 'year', 'month', 'week', 'enhanced' ], value_parts=[ 'impressions', 'clicks', 'pinned', 'blocked', 'sponsored', 'sponsored_link' ], table='impression_stats_daily', ), 'site_stats': Keyset( key_parts=[ 'date', 'locale', 'country_code', 'os', 'browser', 'version', 'device', 'year', 'month', 'week', 'url' ], value_parts=[ 'impressions', 'clicks', 'pinned', 'blocked', 'sponsored', 'sponsored_link' ],
'contbr_occupation', 'contb_receipt_amt', 'contb_receipt_dt', 'receipt_desc', 'memo_cd', 'memo_text', 'form_tp', 'file_num', ), csv_dialect='excel', keysets={ 'by_candidate': Keyset( key_parts=['cand_nm'], value_parts=['count', 'contb_receipt_amt'], column_mappings={ 'cand_nm': 'candidate', 'contb_receipt_amt': 'amount', }, ), 'by_occupation': Keyset( key_parts=['contbr_occupation', 'cand_nm'], value_parts=['count', 'contb_receipt_amt'], column_mappings={ 'count': 'count_occupation_candidate', 'cand_nm': 'candidate', 'contb_receipt_amt': 'amount', 'contbr_occupation': 'occupation', }, ) })