コード例 #1
0
def createFacts(events, users):
    try:
        events_uid = etl.cutout(events, 'tracking_id', 'utm_medium', 'utm_campaign')
        events_tui = etl.cutout(events, 'user_id')

        stage_uid = etl.join(users, events_uid, key='user_id')
        stage_tui = etl.join(users, events_tui, key='tracking_id')

        stage_utm = etl.cut(stage_tui, 'user_id', 'utm_medium', 'utm_campaign')
        stage_uid_utm = etl.join(stage_uid, stage_utm, key='user_id')
        stage_m_s = etl.mergesort(stage_uid_utm, stage_tui, key=['created_at', 'email'])

        mappings = OrderedDict()
        mappings['tid'] = 'tracking_id'
        mappings['uid'] = 'user_id'
        mappings['utm_medium'] = 'utm_medium'
        mappings['utm_campaign'] = 'utm_campaign', {'audio': 'none', 'social': 'none'}
        mappings['utm_campaigntype'] = 'utm_campaign'
        mappings['email'] = 'email'
        mappings['subscription'] = 'type'
        mappings['sub_order'] = 'type', {'Signup Completed': '1', 'Trial Started': '2', 'Subscription Started': '3', 'Subscription Ended': '4'}
        mappings['created_at'] = 'created_at'

        # Mapping
        stage_mapping = etl.fieldmap(stage_m_s, mappings)

        # Sort
        stage_mapping_ordered = etl.sort(stage_mapping, key=['created_at', 'email', 'sub_order'])

        # Datetime split
        t1 = etl.split(stage_mapping_ordered, 'created_at', 'T', ['date', 'time'], include_original=True)
        t2 = etl.split(t1, 'date', '-', ['year', 'month', 'day'])
        stage_ready = etl.split(t2, 'time', ':', ['hour', 'minute', 'second'])

        # Export as csv to load folder
        etl.tocsv(stage_ready, 'load/facts.csv')

    except Exception as e:
        print("Something went wrong. Error {0}".format(e))
コード例 #2
0
ファイル: examples.py プロジェクト: datamade/petl
# using the include_original argument
table3 = capture(table1, 'variable', '(\\w)(\\d+)', ['treat', 'time'], include_original=True)
look(table3)


# split

table1 = [['id', 'variable', 'value'],
          ['1', 'parad1', '12'],
          ['2', 'parad2', '15'],
          ['3', 'tempd1', '18'],
          ['4', 'tempd2', '19']]

from petl import split, look
look(table1)
table2 = split(table1, 'variable', 'd', ['variable', 'day'])
look(table2)


# select

table1 = [['foo', 'bar', 'baz'],
          ['a', 4, 9.3],
          ['a', 2, 88.2],
          ['b', 1, 23.3],
          ['c', 8, 42.0],
          ['d', 7, 100.9],
          ['c', 2]]

from petl import select, look     
look(table1)
コード例 #3
0
ファイル: examples.py プロジェクト: aklimchak/petl
# using the include_original argument
table3 = capture(table1, 'variable', '(\\w)(\\d+)', ['treat', 'time'], include_original=True)
look(table3)


# split

table1 = [['id', 'variable', 'value'],
          ['1', 'parad1', '12'],
          ['2', 'parad2', '15'],
          ['3', 'tempd1', '18'],
          ['4', 'tempd2', '19']]

from petl import split, look
look(table1)
table2 = split(table1, 'variable', 'd', ['variable', 'day'])
look(table2)


# select

table1 = [['foo', 'bar', 'baz'],
          ['a', 4, 9.3],
          ['a', 2, 88.2],
          ['b', 1, 23.3],
          ['c', 8, 42.0],
          ['d', 7, 100.9],
          ['c', 2]]

from petl import select, look     
look(table1)
コード例 #4
0
mappings['utm_medium'] = 'utm_medium'
mappings['utm_campaign'] = 'utm_campaign', {'audio': 'none', 'social': 'none'}
mappings['utm_campaign_type'] = 'utm_campaign'
mappings['email'] = 'email'
mappings['subscription'] = 'type'
mappings['sub_order'] = 'type', {
    'Signup Completed': '1',
    'Trial Started': '2',
    'Subscription Started': '3',
    'Subscription Ended': '4'
}
mappings['created_at'] = 'created_at'

# Mapping
stage_mapping = etl.fieldmap(stage_m_s, mappings)

# Sort
stage_mapping_ordered = etl.sort(stage_mapping,
                                 key=['created_at', 'email', 'sub_order'])

# Datetime split
t1 = etl.split(stage_mapping_ordered,
               'created_at',
               'T', ['date', 'time'],
               include_original=True)
t2 = etl.split(t1, 'date', '-', ['year', 'month', 'day'])
stage_ready = etl.split(t2, 'time', ':', ['hour', 'minute', 'second'])

# Export as csv to load folder
etl.tocsv(stage_ready, 'load/facts.csv')