Ejemplos de fieldmap en Python, ejemplos de petl.fieldmap en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: run.py Proyecto: SergeVL/petlsql

def table_execute(view, **kwargs):
    r = iter(view)  #@
    if 'row_number' in kwargs:
        r = etl.addrownumbers(r, field=kwargs['row_number'])
    if 'fieldmap' in kwargs:
        r = etl.fieldmap(r, kwargs['fieldmap'])
    return r

Ejemplo n.º 2

0

Mostrar archivo

def transform_characters_data(characters_page, url_to_name_map):
    transformations = OrderedDict()
    for field in settings.STAR_WARS_CHARACTERS_BASE_FIELDS:
        transformations[field] = field
    transformations['date'] = 'edited', lambda v: v[:10]  # the width of the
    # date is always the same
    transformations['homeworld'] = 'homeworld', lambda v: url_to_name_map[v]
    return etl.fieldmap(characters_page, transformations)

Ejemplo n.º 3

0

Mostrar archivo

Archivo: demo1.py Proyecto: sshd123/study-python3

def test_sync_users(src_conn, dest_conn):
    src_table = etl.fromdb(src_conn, 'select * from auth_user limit 8')

    mapping = OrderedDict()
    mapping['org_id'] = lambda x: 1
    mapping['username'] = '******'
    mapping['name'] = 'full_name'
    mapping['status'] = lambda x: 'ACTIVE'
    mapping['uid'] = 'username'
    mapping['type'] = lambda x: "STUDENT"
    dst_table = etl.fieldmap(src_table, mapping)
    upsert_many(dest_conn['auth_user'], dst_table, keys=['username'])

Ejemplo n.º 4

0

Mostrar archivo

def dataPreProcessing(fileName):
    inputData = fromcsv(fileName)
    table1 = cutout(inputData, 'member_id', 'grade', 'sub_grade', 'emp_title',
                    'url', 'desc', 'title', 'accept_d', 'exp_d', 'list_d',
                    'issue_d', 'purpose', 'addr_city', 'addr_state',
                    'earliest_cr_line', 'last_pymnt_d', 'next_pymnt_d',
                    'last_credit_pull_d')
    table2 = select(
        table1,
        lambda i: i['term'] == ' 36 months' and i['loan_status'] is not "")
    labelMapping = OrderedDict()
    labelMapping['loan_status'] = 'loan_status'
    labelMapping['id'] = 'id'
    table6 = fieldmap(table2, labelMapping)
    table8 = sort(table6, 'id')
    table10 = cutout(table8, 'id')
    mappings = OrderedDict()
    mappings['id'] = 'id'
    mappings['home_ownership'] = 'ownership', {
        'MORTGAGE': '-1',
        'RENT': '0',
        'OWN': '1'
    }
    mappings['emp_length'] = 'empLength', {'n/a': 0}
    mappings['is_inc_v'] = 'verificationStatus', {
        'Source Verified': 1,
        'Verified': 0,
        'Not Verified': -1
    }
    mappings['pymnt_plan'] = 'paymentPlan', {'n': 0, 'y': 1}
    mappings['initial_list_status'] = 'listStatus', {'f': 0, 'w': 1}
    table3 = fieldmap(table2, mappings)
    table4 = cutout(table2, 'home_ownership', 'is_inc_v', 'pymnt_plan',
                    'initial_list_status', 'term', 'loan_status')
    table5 = merge(table3, table4, key='id')
    table7 = sort(table5, 'id')
    table9 = cutout(table7, 'id')
    featureFileCsv = tocsv(table9, 'featureFileCsv.csv')
    labelsFileCsv = tocsv(table10, 'labelsFileCsv.csv')
    return featureFileCsv, labelsFileCsv

Ejemplo n.º 5

0

Mostrar archivo

Archivo: integrator_refactored.py Proyecto: Pedrotojal/blendle-etl-challenge

def createFacts(events, users):
    try:
        events_uid = etl.cutout(events, 'tracking_id', 'utm_medium', 'utm_campaign')
        events_tui = etl.cutout(events, 'user_id')

        stage_uid = etl.join(users, events_uid, key='user_id')
        stage_tui = etl.join(users, events_tui, key='tracking_id')

        stage_utm = etl.cut(stage_tui, 'user_id', 'utm_medium', 'utm_campaign')
        stage_uid_utm = etl.join(stage_uid, stage_utm, key='user_id')
        stage_m_s = etl.mergesort(stage_uid_utm, stage_tui, key=['created_at', 'email'])

        mappings = OrderedDict()
        mappings['tid'] = 'tracking_id'
        mappings['uid'] = 'user_id'
        mappings['utm_medium'] = 'utm_medium'
        mappings['utm_campaign'] = 'utm_campaign', {'audio': 'none', 'social': 'none'}
        mappings['utm_campaigntype'] = 'utm_campaign'
        mappings['email'] = 'email'
        mappings['subscription'] = 'type'
        mappings['sub_order'] = 'type', {'Signup Completed': '1', 'Trial Started': '2', 'Subscription Started': '3', 'Subscription Ended': '4'}
        mappings['created_at'] = 'created_at'

        # Mapping
        stage_mapping = etl.fieldmap(stage_m_s, mappings)

        # Sort
        stage_mapping_ordered = etl.sort(stage_mapping, key=['created_at', 'email', 'sub_order'])

        # Datetime split
        t1 = etl.split(stage_mapping_ordered, 'created_at', 'T', ['date', 'time'], include_original=True)
        t2 = etl.split(t1, 'date', '-', ['year', 'month', 'day'])
        stage_ready = etl.split(t2, 'time', ':', ['hour', 'minute', 'second'])

        # Export as csv to load folder
        etl.tocsv(stage_ready, 'load/facts.csv')

    except Exception as e:
        print("Something went wrong. Error {0}".format(e))

Ejemplo n.º 6

0

Mostrar archivo

    def export_process(self):
        total = petl.nrows(self.data)
        if self.data and total > 0:
            mappings = OrderedDict()
            mappings['Location'] = 'location'
            mappings['Position'] = 'position', lambda rec: construct_postion(
                rec)
            mappings[
                'Local Time'] = 'time', lambda rec: datetime.fromtimestamp(
                    rec).strftime("%Y-%m-%d %H:%M:%S")
            mappings[
                'Conditions Time'] = 'summary', lambda rec: populate_summary(
                    rec)
            mappings['Temperature'] = 'temperature'
            mappings['Pressure'] = 'pressure'
            mappings['Humidity'] = 'humidity', lambda rec: int(rec * 100)
            self.data = petl.fieldmap(self.data, mappings)
            return True

        else:
            print(
                'Data store doesnt have historic Data. Please run import and export Job'
            )
            return False

Ejemplo n.º 7

0

Mostrar archivo

Archivo: cleanData_Train.py Proyecto: mcsfernando10/Emp-Sense-ETL

with open('src/etl/outputs/attributes.json') as data_file:    
    data = json.load(data_file)

filePath = data['filePath']
dataTable = etl.fromcsv(filePath)

mappings = OrderedDict()
#length of attribtues map
length = len(data['attibutes'])
#map Attributes and clean them
for x in range(length):
    attr = data['attibutes'][x]['attrName']
    matchingField = data['attibutes'][x]['matchingField']
    mappings[attr] = matchingField
    
mappedTable = etl.fieldmap(dataTable, mappings)

cleansedTable = mappedTable
#add rules to clean the table - reversed for give the priority for top attributes
for x in reversed(range(length)):
    attr = data['attibutes'][x]['attrName']
    rules = data['attibutes'][x]['rules']
    rulesListSize = len(rules)
    for y in range(rulesListSize):
        if rules[y] == "Remove Null Value Rows":
            cleansedTable = etl.select(cleansedTable, attr, lambda v: v != '')
        if rules[y] == "Remove Duplicates":
            cleansedTable = etl.aggregate(cleansedTable, attr)
        if rules[y] == "Sort":
            cleansedTable = etl.mergesort(cleansedTable, key=attr)
        if rules[y] == "Number Validation":

Ejemplo n.º 8

0

Mostrar archivo

Archivo: menu_items.py Proyecto: tdelam/g1-etl

def transform(mmj_menu_items, mmj_categories, prices, organization_id,
              source_db, debug):
    """
    Transform data
    """
    # source data table
    source_dt = utils.view_to_list(mmj_menu_items)

    cut_menu_data = [
        'id', 'vendor_id', 'menu_id', 'dispensary_id', 'strain_id',
        'created_at', 'updated_at', 'category_id', 'name', 'sativa', 'indica',
        'on_hold', 'product_type', 'image_file_name', 'medicine_amount',
        'product_type'
    ]

    cut_prices = [
        'menu_item_id', 'dispensary_id', 'price_half_gram', 'price_gram',
        'price_two_gram', 'price_eigth', 'price_quarter', 'price_half',
        'price_ounce'
    ]

    # Cut out all the fields we don't need to load
    menu_items = etl.cut(source_dt, cut_menu_data)
    prices_data = etl.cut(prices, cut_prices)

    menu_items = (etl.addfield(
        menu_items, 'createdAtEpoch').addfield('unitOfMeasure').addfield(
            'locationProductDetails').addfield('keys').addfield('restockLevel')
                  )

    # Two-step transform and cut. First we need to cut the name
    # and id from the source data to map to.
    cut_source_cats = etl.cut(mmj_categories, 'name', 'id', 'measurement')
    source_values = etl.values(cut_source_cats, 'name', 'id')

    # Then we nede a dict of categories to compare against.
    # id is stored to match against when transforming and mapping categories
    mmj_categories = dict([(value, id) for (value, id) in source_values])

    mappings = OrderedDict()
    mappings['id'] = 'id'
    mappings['createdAt'] = 'created_at'
    mappings['updatedAt'] = 'updated_at'
    mappings['createdAtEpoch'] = lambda x: utils.create_epoch(x.created_at)
    mappings['name'] = 'name'
    mappings['shareOnWM'] = lambda x: _wm_integration(x.id, source_db)
    """
    1 = Units
    2 = Grams (weight)
    """
    mappings['unitOfMeasure'] = \
        lambda x: _map_uom(x.category_id, source_db)

    fields = etl.fieldmap(menu_items, mappings)
    data = etl.merge(menu_items, fields, key='id')

    items = []
    for item in etl.dicts(data):

        breakpoint_pricing = (etl.select(
            prices_data,
            lambda x: x.dispensary_id == item['dispensary_id']).rename({
                'price_eigth':
                'price_eighth'
            }).cutout('menu_item_id'))
        # Set image url for load to download
        url = None
        if debug and item['image_file_name'] is not None:
            url = ("https://wm-mmjmenu-images-development.s3."
                   "amazonaws.com/menu_items/images/{0}/large/"
                   "{1}").format(item['id'], item['image_file_name'])
        elif item['image_file_name'] is not None:
            url = ("https://wm-mmjmenu-images-production.s3."
                   "amazonaws.com/menu_items/images/{0}/large/"
                   "{1}").format(item['id'], item['image_file_name'])

        item['image_file_name'] = url

        item['categoryId'] = _map_categories(item['category_id'],
                                             item['sativa'], item['indica'],
                                             mmj_categories, menu_items)
        item['keys'] = {
            'dispensary_id': item['dispensary_id'],
            'id': item['id'],
            'menu_id': item['menu_id'],
            'vendor_id': item['vendor_id'],
            'strain_id': item['strain_id'],
            'category_id': item['category_id']
        }

        # set a default netMJ value if the menu item is a unit product
        if item['unitOfMeasure'] is 2:
            item['netMarijuana'] = int(item['medicine_amount'])

        for key in item['keys'].keys():
            if not item['keys'][key]:
                del item['keys'][key]

        item['locationProductDetails'] = {
            'id': item['id'],
            'active': _active(item['on_hold'])
        }

        item['restockLevel'] = _restock_level(item['dispensary_id'],
                                              item['product_type'], source_db)

        if item['shareOnWM'] is None:
            item['shareOnWM'] = False

        for price in etl.dicts(breakpoint_pricing):
            try:
                price_two_gram = price['price_two_gram']
            except KeyError:
                price_two_gram = 0.0

            item['locationProductDetails']['weightPricing'] = {
                'price_half_gram':
                utils.dollars_to_cents(price['price_half_gram']),
                'price_two_gram': utils.dollars_to_cents(price_two_gram),
                'price_gram': utils.dollars_to_cents(price['price_gram']),
                'price_eighth': utils.dollars_to_cents(price['price_eighth']),
                'price_quarter':
                utils.dollars_to_cents(price['price_quarter']),
                'price_half': utils.dollars_to_cents(price['price_half']),
                'price_ounce': utils.dollars_to_cents(price['price_ounce'])
            }

        del item['vendor_id']
        del item['indica']
        del item['dispensary_id']
        del item['id']
        del item['strain_id']
        del item['on_hold']
        del item['menu_id']
        del item['sativa']
        del item['category_id']
        del item['updated_at']
        del item['created_at']
        del item['product_type']

        if item['image_file_name'] is None:
            del item['image_file_name']

        # set up final structure for API
        items.append(item)

    # Remove inactive items
    for item in items:
        if item['locationProductDetails']['active'] is False:
            items.remove(item)

    if debug:
        result = json.dumps(items,
                            sort_keys=True,
                            indent=4,
                            default=utils.json_serial)
        print(result)

    return items

Ejemplo n.º 9

0

Mostrar archivo

import re
from collections import OrderedDict
import pymysql
import dbconfig

read_db_conn = pymysql.connect(host=dbconfig.db_host,
                               port=dbconfig.db_port,
                               charset="utf8",
                               user=dbconfig.db_user,
                               password=dbconfig.db_pass,
                               db=dbconfig.db_name)

products = etl.fromdb(
    read_db_conn, "SELECT id,name,description FROM {} limit 5000".format(
        dbconfig.db_table_items))


# remove non-alphanumeric character
def cleanString(val):
    nonewline = val.replace('\n', " ")
    return re.sub(r'\W+', ' ', nonewline).lower()


mappings = OrderedDict()
mappings['id'] = 'id'
mappings['item_description'] = lambda val: cleanString(val['name'] + " " + val[
    'description'])

products = etl.fieldmap(products, mappings)
etl.tocsv(products, 'query_result.csv')

Ejemplo n.º 10

0

Mostrar archivo

def transform(source_data, organization_id, debug):
    """
    Load the transformed data into the destination(s)
    """
    # source data table
    source_dt = utils.view_to_list(source_data)

    cut_data = [
        'id', 'dispensary_id', 'picture_file_name', 'name', 'email',
        'address', 'phone_number', 'dob', 'license_type', 'registry_no',
        'membership_id', 'given_caregivership', 'tax_exempt',
        'drivers_license_no', 'points', 'locked_visits',
        'locked_visits_reason', 'caregiver_id', 'picture_file_name',
        'card_expires_at', 'created_at', 'updated_at', 'physician_id',
        'custom_membership_id', 'organization_membership_id', 'city',
        'state', 'zip_code', 'address', 'organization_id'
    ]
    member_data = etl.cut(source_dt, cut_data)

    members = (
        etl
        .addfield(member_data, 'identificationType')
        .addfield('createdAtEpoch')
    )

    member_mapping = OrderedDict()

    member_mapping['id'] = 'id'
    member_mapping['caregiver_id'] = 'caregiver_id'
    member_mapping['dispensary_id'] = 'dispensary_id'
    member_mapping['physician_id'] = 'physician_id'
    member_mapping['custom_membership_id'] = 'custom_membership_id'
    member_mapping['organization_membership_id'] = 'organization_membership_id'
    member_mapping['picture_file_name'] = 'picture_file_name'
    member_mapping['dateOfBirth'] = 'dob'
    member_mapping['name'] = 'name'
    member_mapping['phone_number'] = 'phone_number'
    member_mapping['email'] = 'email'
    member_mapping['organization_id'] = 'organization_id'
    # MEDICAL 1, RECREATIONAL 2
    member_mapping['memberType'] = \
        lambda m: 'MEDICAL' if m.license_type == 1 else 'RECREATIONAL'

    member_mapping['mmjCard'] = 'registry_no'
    member_mapping['isCaregiver'] = \
        lambda x: utils.true_or_false(x.given_caregivership)
    member_mapping['identificationNumber'] = 'drivers_license_no'
    member_mapping['points'] = 'points'
    member_mapping['card_expires_at'] = 'card_expires_at'
    member_mapping['taxExempt'] = lambda x: utils.true_or_false(x.tax_exempt)
    member_mapping['locked_visits'] = 'locked_visits'
    member_mapping['locked_visits_reason'] = 'accountStatusNotes'
    member_mapping['address'] = 'address'
    member_mapping['city'] = 'city'
    member_mapping['zip_code'] = 'zip_code'
    member_mapping['state'] = 'state'
    member_mapping['createdAt'] = 'created_at'
    member_mapping['updatedAt'] = 'updated_at'

    member_mapping['accountStatus'] = \
        lambda x: utils.account_status(x.locked_visits)

    member_fields = etl.fieldmap(members, member_mapping)

    members = []
    for item in etl.dicts(member_fields):
        
        item['keys'] = {
            'id': item['id'],
            'caregiver_id': item['caregiver_id'],
            'dispensary_id': item['dispensary_id'],
            'physician_id': item['physician_id'],
            'custom_membership_id': item['custom_membership_id'],
            'organization_membership_id': item['organization_membership_id'],
            'picture_file_name': item['picture_file_name'],
            'organization_id': item['organization_id'],
        }

        if item['card_expires_at'] is not None:
            item['expiryDate'] = item['card_expires_at']

        # remove any item['keys'] tuples with None values
        for key in item['keys'].keys():
            if not item['keys'][key]:
                del item['keys'][key]

        # set up final structure for API
        item['identificationType'] = 'Drivers License'

        # We may not need this in the data
        item['address'] = [{
            'line1': item['address'],
            'city': item['city'],
            'state': item['state'],
            'zip': item['zip_code'],
        }]

        # replace None value dobs with the epoch beginning of time
        if not item['dateOfBirth']:
            dob = time.strftime('%Y-%m-%d %H:%M:%S.000Z', time.gmtime(0))
            item['dateOfBirth'] = datetime.strptime(dob, '%Y-%m-%d %H:%M:%S.000Z')

        #del item['address']
        del item['city']
        del item['zip_code']
        del item['state']
        del item['dispensary_id']
        del item['id']
        del item['physician_id']
        del item['caregiver_id']
        del item['custom_membership_id']
        del item['organization_membership_id']
        del item['organization_id']
        del item['picture_file_name']
        del item['locked_visits_reason']
        del item['locked_visits']
        del item['card_expires_at']
        
        members.append(item)

    if debug:
        result = json.dumps(members, sort_keys=True,
                            indent=4, default=utils.json_serial)
        print(result)

    return members

Ejemplo n.º 11

0

Mostrar archivo

Archivo: run.py Proyecto: SergeVL/petlsql

def fieldmap_execute(c, fieldmap={}, **kwargs):
    r = c()
    if fieldmap:
        r = etl.fieldmap(r, fieldmap)
    return r

Ejemplo n.º 12

0

Mostrar archivo

Archivo: examples.py Proyecto: aklimchak/petl

          [5, '-', 25, 1.65, 51.9]]

from petl import fieldmap, look
look(table1)
from collections import OrderedDict
mappings = OrderedDict()
# rename a field
mappings['subject_id'] = 'id'
# translate a field
mappings['gender'] = 'sex', {'male': 'M', 'female': 'F'}
# apply a calculation to a field
mappings['age_months'] = 'age', lambda v: v * 12
# apply a calculation to a combination of fields
mappings['bmi'] = lambda rec: rec['weight'] / rec['height']**2 
# transform and inspect the output
table2 = fieldmap(table1, mappings)
look(table2)
# field mappings can also be added and/or updated after the table is created 
# via the suffix notation
table3 = fieldmap(table1)
table3['subject_id'] = 'id'
table3['gender'] = 'sex', {'male': 'M', 'female': 'F'}
table3['age_months'] = 'age', lambda v: v * 12
# use an expression string this time
table3['bmi'] = '{weight} / {height}**2'
look(table3)


# facet

table1 = [['foo', 'bar', 'baz'],

Ejemplo n.º 13

0

Mostrar archivo

def transform(source_data, organization_id, debug):
    """
    Load the transformed data into the destination(s)
    """
    # source data table
    source_dt = utils.view_to_list(source_data)
    cut_data = ['id', 'dispensary_id', 'name', 'email', 'created_at',
                'updated_at', 'address', 'city', 'state', 'country',
                'zip_code', 'website', 'license_no', 'phone_number']
    physician_data = etl.cut(source_dt, cut_data)

    physicians = (
        etl
        .addfield(physician_data, 'createdAtEpoch')
    )

    physician_mapping = OrderedDict()

    physician_mapping['id'] = 'id'
    physician_mapping['dispensary_id'] = 'dispensary_id'
    physician_mapping['name'] = 'name'
    physician_mapping['email'] = 'email'
    physician_mapping['createdAt'] = 'created_at'
    physician_mapping['updatedAt'] = 'updated_at'
    physician_mapping['address'] = 'address'
    physician_mapping['city'] = 'city'
    physician_mapping['state'] = 'state'
    physician_mapping['zip_code'] = 'zip_code'
    physician_mapping['country'] = 'country'
    physician_mapping['verificationWebsite'] = 'website'
    physician_mapping['licenceNumber'] = 'license_no'
    physician_mapping['phone'] = 'phone_number'

    physician_fields = etl.fieldmap(physicians, physician_mapping)

    physicians = []
    for item in etl.dicts(physician_fields):
        item['keys'] = {
            'dispensary_id': item['dispensary_id'],
            'id': item['id']
        }

        # remove any item['keys'] tuples with None values
        for key in item['keys'].keys():
            if not item['keys'][key]:
                del item['keys'][key]

        if item['city'] or item['address'] or item['state'] or item['zip_code'] or item['country']:
            item['address'] = [{
                'line1': item['address'],
                'city': item['city'],
                'state': item['state'],
                'zip': item['zip_code'],
                'country': item['country'],
            }]

        name = re.sub(r'^(Dr(?:.)?s|(?:Dr.?))', '', item['name'], 
                      flags=re.IGNORECASE)

        item['name'] = name.strip()

        if item['phone'] is not None:
            item['phone'] = [{
                'name': 'work',
                'number': item['phone'],
                'default': True
            }]
        else:
            del item['phone']
        
        item['specialty'] = 'General'

        if item['licenceNumber'] is None or item['verificationWebsite'] \
            is None or item['email'] is None:
                del item['licenceNumber']
                del item['verificationWebsite']
                del item['email']

        if item['address'] is None:
            del item['address']

        del item['city']
        del item['zip_code']
        del item['state']
        del item['country']
        del item['dispensary_id']
        del item['id']

        physicians.append(item)

    if debug:
        result = json.dumps(physicians, sort_keys=True,
                            indent=4, default=utils.json_serial)
        print(result)

    return physicians

Ejemplo n.º 14

0

Mostrar archivo

def transform(source_data, organization_id, debug):
    """
    Load the transformed data into the destination(s)
    """
    # source data table
    source_dt = utils.view_to_list(source_data)
    cut_data = [
        'id', 'dispensary_id', 'mmjvenu_id', 'name', 'phone_number', 'email',
        'country', 'state', 'city', 'address', 'zip_code', 'liscense_no',
        'confirmed', 'website'
    ]
    vendor_data = etl.cut(source_dt, cut_data)

    vendor_mappings = OrderedDict()
    vendor_mappings['id'] = 'id'
    vendor_mappings['dispensary_id'] = 'dispensary_id'
    vendor_mappings['address'] = 'address'

    # field renames
    vendor_mappings['accountStatus'] = \
        lambda x: "ACTIVE" if x.confirmed == 1 else "INACTIVE"
    vendor_mappings['phone'] = 'phone_number'
    vendor_mappings['licenceNumber'] = 'liscense_no'
    vendor_mappings['zip'] = 'zip_code'

    vendors_fields = etl.fieldmap(vendor_data, vendor_mappings)
    merged_vendors = etl.merge(vendor_data, vendors_fields, key='id')

    vendors = []
    for item in etl.dicts(merged_vendors):
        if item['address'] is not None:
            item['address'] = {
                'line1': item['address'],
                'line2': None,
                'city': item['city'],
                'state': item['state'],
                'zip': item['zip'],
                'country': item['country'],
            }
        else:
            del item['address']

        if item['licenceNumber'] is None or item['email'] is None or item[
                'website'] is None:
            del item['licenceNumber']
            del item['email']
            del item['website']

        if item['phone'] is not None:
            item['phone'] = [{
                'name': 'business',
                'number': item['phone'],
                'default': True
            }]
        else:
            del item['phone']

        item['keys'] = {
            'dispensary_id': item['dispensary_id'],
            'id': item['id'],
            'mmjvenu_id': item['mmjvenu_id']
        }

        # remove any item['keys'] tuples with None values
        for key in item['keys'].keys():
            if not item['keys'][key]:
                del item['keys'][key]

        # mutate dict and remove fields that are mapped and no longer required
        del item['zip']
        del item['state']
        del item['country']
        del item['city']
        del item['zip_code']
        del item['phone_number']
        del item['confirmed']
        del item['liscense_no']
        # delete fk's
        del item['mmjvenu_id']
        del item['id']
        del item['dispensary_id']

        # set up final structure for API
        vendors.append(item)

    if debug:
        result = json.dumps(vendors,
                            sort_keys=True,
                            indent=4,
                            default=utils.json_serial)
        print(result)

    return vendors

Ejemplo n.º 15

0

Mostrar archivo

Archivo: employees.py Proyecto: tdelam/g1-etl

def transform(mmj_employees, organization_id, debug, fake_email, source_db):
    """
    Load the transformed data into the destination(s)
    """
    # source data table
    source_dt = utils.view_to_list(mmj_employees)
    cut_data = [
        'id', 'email', 'first_name', 'organization_id', 'last_name',
        'created_at', 'updated_at', 'login'
    ]

    employee_data = etl.cut(source_dt, cut_data)

    employees = (etl.addfield(
        employee_data,
        'keys').addfield('name').addfield('role').addfield('dateOfBirth'))

    mappings = OrderedDict()
    mappings['id'] = 'id'
    mappings['name'] = \
        lambda name: _set_name(name.first_name, name.last_name, name.login)
    """
    Roles:
        1 = site-admin
        2 = site-admin
        3 = store-manager
        4 = budtender
    """
    mappings['role'] = lambda x: _assign_role(x.id, source_db)

    mappings['createdAt'] = 'created_at'
    mappings['updatedAt'] = 'updated_at'
    mappings['dateOfBirth'] = \
        lambda _: datetime.datetime(year=1970, month=01,
                                    day=01, hour=02, minute=30)
    mappings['organization_id'] = 'organization_id'  # keep mmj org
    mappings['accountStatus'] = lambda x: _active(x.id, source_db)

    fields = etl.fieldmap(employees, mappings)
    merged_employees = etl.merge(employees, fields, key='id')

    mapped_employees = []
    for item in etl.dicts(merged_employees):
        item['keys'] = {
            'id': item['id'],
            'organization_id': item['organization_id']
        }

        # remove any item['keys'] tuples with None values
        for key in item['keys'].keys():
            if not item['keys'][key]:
                del item['keys'][key]

        item['email'] = _set_email(item['email'], fake_email, debug)

        del item['login']
        del item['first_name']
        del item['last_name']
        del item['created_at']
        del item['id']
        del item['organization_id']
        # set up final structure for API
        mapped_employees.append(item)

    if debug:
        result = json.dumps(mapped_employees,
                            sort_keys=True,
                            indent=4,
                            default=utils.json_serial)
        print(result)

    return mapped_employees

Ejemplo n.º 16

0

Mostrar archivo

Archivo: maps.py Proyecto: zli69/petl

          [1, 'male', 16, 1.45, 62.0],
          [2, 'female', 19, 1.34, 55.4],
          [3, 'female', 17, 1.78, 74.4],
          [4, 'male', 21, 1.33, 45.2],
          [5, '-', 25, 1.65, 51.9]]
mappings = OrderedDict()
# rename a field
mappings['subject_id'] = 'id'
# translate a field
mappings['gender'] = 'sex', {'male': 'M', 'female': 'F'}
# apply a calculation to a field
mappings['age_months'] = 'age', lambda v: v * 12
# apply a calculation to a combination of fields
mappings['bmi'] = lambda rec: rec['weight'] / rec['height']**2
# transform and inspect the output
table2 = etl.fieldmap(table1, mappings)
table2


# rowmap()
##########


import petl as etl
table1 = [['id', 'sex', 'age', 'height', 'weight'],
          [1, 'male', 16, 1.45, 62.0],
          [2, 'female', 19, 1.34, 55.4],
          [3, 'female', 17, 1.78, 74.4],
          [4, 'male', 21, 1.33, 45.2],
          [5, '-', 25, 1.65, 51.9]]
def rowmapper(row):

Ejemplo n.º 17

0

Mostrar archivo

def transform(dispensary_details, pricing, organization_id, debug, source_db):
    """
    Load the transformed data into the destination(s)
    """
    # source data table
    general_settings = utils.view_to_list(dispensary_details)
    pricing_detail = utils.view_to_list(pricing)

    dispensary_cut_data = [
        'id', 'dispensary_id', 'menu_show_tax', 'logo_file_name',
        'inactivity_logout', 'calculate_even_totals',
        'require_customer_referrer', 'membership_fee_enabled', 'pp_enabled',
        'pp_global_dollars_to_points', 'pp_global_points_to_dollars',
        'pp_points_per_referral', 'allow_unpaid_visits', 'red_flags_enabled',
        'mmjrevu_api_key'
    ]

    pricing_cut_data = [
        'id', 'price_half_gram', 'price_gram', 'price_two_gram', 'price_eigth',
        'price_quarter', 'price_half', 'price_ounce'
    ]

    dispensary_settings_data = etl.cut(general_settings, dispensary_cut_data)
    pricing_data = etl.cut(pricing_detail, pricing_cut_data)

    settings = (etl.addfield(dispensary_settings_data, 'organizationId'))

    mappings = OrderedDict()
    mappings['id'] = 'id'

    # field renames
    mappings['organizationId'] = organization_id

    settings_fields = etl.fieldmap(settings, mappings)
    merged_settings = (
        etl.merge(settings, settings_fields, key='id').rename({
            # Global -> General -> SESSION TIMEOUT DURATION
            'inactivity_logout':
            'sessionTimeoutDuration',
            # Global -> Logo
            'logo_file_name':
            'image',
            # Global -> Members -> Membership Level
            'membership_fee_enabled':
            'membershipLevelsEnabled',
            'pp_global_dollars_to_points':
            'dollarsPerPoint',
            'pp_global_points_to_dollars':
            'pointsPerDollar',
            'pp_points_per_referral':
            'referralPoints',

            # <Location> -> Sales -> TAXES IN
            'menu_show_tax':
            'enableTaxesIn',
            # <Location> -> Sales -> PRICE ROUNDING
            'calculate_even_totals':
            'hasPriceRounding',
            # <Location> -> Members -> REFERRER REQUIRED
            'require_customer_referrer':
            'mandatoryReferral',
            # <Location> -> Members -> PAID VISITS
            'allow_unpaid_visits':
            'paidVisitsEnabled',
            # <Location> -> Members -> MEDICAL MEMBERS
            'red_flags_enabled':
            'hasLimits',
            # <Location> -> General -> STORE LOCATIONS
            'mmjrevu_api_key':
            'apiKey'
        }))
    settings = {}
    for item in etl.dicts(merged_settings):
        item['keys'] = {
            'dispensary_id': item['dispensary_id'],
            'id': item['id']
        }

        # if not item['sessionTimeoutDuration'] >= 30:
        #     del item['sessionTimeoutDuration']

        url = None
        if debug and item['image'] is not None:
            url = ("https://wm-mmjmenu-images-development.s3."
                   "amazonaws.com/logos/{0}/original/"
                   "{1}").format(item['id'], item['image'])
        elif item['image'] is not None:
            url = ("https://wm-mmjmenu-images-production.s3."
                   "amazonaws.com/logos/{0}/original/"
                   "{1}").format(item['id'], item['image'])

        item['image'] = url

        # remove any item['keys'] tuples with None values
        for key in item['keys'].keys():
            if not item['keys'][key]:
                del item['keys'][key]
        """
        Member settings nested - crm.member.settings
        """
        if item['pp_enabled']:
            item['crm_member_settings'] = {}
            item['crm_member_settings']['membershipLevel'] = {
                'membershipLevelsEnabled': \
                    utils.true_or_false(item['membershipLevelsEnabled']),
                'levelName': 'Unnamed',
                'dollarsPerPoint': item['dollarsPerPoint'],
                'pointsPerDollar': item['pointsPerDollar'],
                'referralPoints': item['referralPoints']
            }
        """
        Location settings nested.
        """
        if item['apiKey']:
            item['location_specific'] = {'apiKey': item['apiKey']}
        else:
            item['location_specific'] = {}

        item['location_specific']['members'] = {
            'paidVisitsEnabled':
            utils.true_or_false(item['paidVisitsEnabled']),
            'mandatoryReferral': utils.true_or_false(item['mandatoryReferral'])
        }
        item['location_specific']['sales'] = {
            'enableTaxesIn': utils.true_or_false(item['enableTaxesIn']),
            'hasPriceRounding': utils.true_or_false(item['hasPriceRounding'])
        }

        # sales.settings.taxes
        item['sales_settings_taxes'] = {}
        for tax in _get_taxes(item['dispensary_id'], source_db):
            item['sales_settings_taxes']['taxes'] = {
                'code': tax['name'],
                'percent': tax['amount'] / 100,
                'type': 'sales'
            }

        for pricing in etl.dicts(pricing_data):
            item['location_specific']['inventory'] = {}
            item['location_specific']['inventory']['weightPricing'] = {
                'name': 'Default',
                'defaultTier': True
            }
            item['location_specific']['inventory']['weightPricing'][
                'breakpoints'] = {
                    'price_half_gram':
                    utils.dollars_to_cents(pricing['price_half_gram']),
                    'price_gram':
                    utils.dollars_to_cents(pricing['price_gram']),
                    'price_two_gram':
                    utils.dollars_to_cents(pricing['price_two_gram']),
                    'price_eighth':
                    utils.dollars_to_cents(pricing['price_eigth']),
                    'price_quarter':
                    utils.dollars_to_cents(pricing['price_quarter']),
                    'price_half':
                    utils.dollars_to_cents(pricing['price_half']),
                    'price_ounce':
                    utils.dollars_to_cents(pricing['price_ounce']),
                }

        # monthly purchase limit is two week limit x2
        if item['hasLimits'] == 1:
            for limits in _medical_limits(item['dispensary_id'], source_db):
                item['location_specific']['members']['medicalLimits'] = {
                    'hasLimits': True,
                    'dailyPurchaseLimit': int(limits['daily_purchase_limit']),
                    'visitPurchaseLimit': int(limits['visit_purchase_limit']),
                    'dailyVisitLimit': int(limits['daily_visit_limit']),
                    'monthlyPurchaseLimit': \
                        int(limits['two_week_purchase_limit'] * 2)
                }

        if item['image'] is None or item['apiKey'] is None:
            del item['image']
            del item['apiKey']

        # delete fk's
        del item['id']
        del item['dispensary_id']
        del item['membershipLevelsEnabled']
        del item['enableTaxesIn']
        del item['hasLimits']
        del item['hasPriceRounding']
        del item['dollarsPerPoint']
        del item['mandatoryReferral']
        del item['paidVisitsEnabled']
        del item['pointsPerDollar']
        del item['pp_enabled']
        del item['referralPoints']

        # set up final structure for API
        settings.update(item)

    if debug:
        result = json.dumps(settings,
                            sort_keys=True,
                            indent=4,
                            default=utils.json_serial)
        print(result)
    return settings

Ejemplo n.º 18

0

Mostrar archivo

Archivo: integrator.py Proyecto: Pedrotojal/blendle-etl-challenge

mappings['uid'] = 'user_id'
mappings['utm_medium'] = 'utm_medium'
mappings['utm_campaign'] = 'utm_campaign', {'audio': 'none', 'social': 'none'}
mappings['utm_campaign_type'] = 'utm_campaign'
mappings['email'] = 'email'
mappings['subscription'] = 'type'
mappings['sub_order'] = 'type', {
    'Signup Completed': '1',
    'Trial Started': '2',
    'Subscription Started': '3',
    'Subscription Ended': '4'
}
mappings['created_at'] = 'created_at'

# Mapping
stage_mapping = etl.fieldmap(stage_m_s, mappings)

# Sort
stage_mapping_ordered = etl.sort(stage_mapping,
                                 key=['created_at', 'email', 'sub_order'])

# Datetime split
t1 = etl.split(stage_mapping_ordered,
               'created_at',
               'T', ['date', 'time'],
               include_original=True)
t2 = etl.split(t1, 'date', '-', ['year', 'month', 'day'])
stage_ready = etl.split(t2, 'time', ':', ['hour', 'minute', 'second'])

# Export as csv to load folder
etl.tocsv(stage_ready, 'load/facts.csv')

Ejemplo n.º 19

0

Mostrar archivo

Archivo: examples.py Proyecto: datamade/petl

          [5, '-', 25, 1.65, 51.9]]

from petl import fieldmap, look
look(table1)
from collections import OrderedDict
mappings = OrderedDict()
# rename a field
mappings['subject_id'] = 'id'
# translate a field
mappings['gender'] = 'sex', {'male': 'M', 'female': 'F'}
# apply a calculation to a field
mappings['age_months'] = 'age', lambda v: v * 12
# apply a calculation to a combination of fields
mappings['bmi'] = lambda rec: rec['weight'] / rec['height']**2 
# transform and inspect the output
table2 = fieldmap(table1, mappings)
look(table2)
# field mappings can also be added and/or updated after the table is created 
# via the suffix notation
table3 = fieldmap(table1)
table3['subject_id'] = 'id'
table3['gender'] = 'sex', {'male': 'M', 'female': 'F'}
table3['age_months'] = 'age', lambda v: v * 12
# use an expression string this time
table3['bmi'] = '{weight} / {height}**2'
look(table3)


# facet

table1 = [['foo', 'bar', 'baz'],

Ejemplo n.º 20

0

Mostrar archivo

Archivo: retriever.py Proyecto: luthfihariz/similiar-item-cosine

import petl as etl
import re
from collections import OrderedDict
import pymysql
import dbconfig

read_db_conn = pymysql.connect(host=dbconfig.db_host,
                              port=dbconfig.db_port,
                              charset="utf8",
                              user=dbconfig.db_user,
                              password=dbconfig.db_pass,
                              db=dbconfig.db_name)


products = etl.fromdb(read_db_conn, "SELECT id,name,description FROM {} limit 5000".format(dbconfig.db_table_items))

# remove non-alphanumeric character
def cleanString(val):
    nonewline = val.replace('\n'," ")
    return re.sub(r'\W+', ' ', nonewline).lower()

mappings = OrderedDict()
mappings['id'] = 'id'
mappings['item_description'] = lambda val : cleanString(val['name'] + " " +val['description'])

products = etl.fieldmap(products, mappings)
etl.tocsv(products, 'query_result.csv')