def table_execute(view, **kwargs): r = iter(view) #@ if 'row_number' in kwargs: r = etl.addrownumbers(r, field=kwargs['row_number']) if 'fieldmap' in kwargs: r = etl.fieldmap(r, kwargs['fieldmap']) return r
def transform_characters_data(characters_page, url_to_name_map): transformations = OrderedDict() for field in settings.STAR_WARS_CHARACTERS_BASE_FIELDS: transformations[field] = field transformations['date'] = 'edited', lambda v: v[:10] # the width of the # date is always the same transformations['homeworld'] = 'homeworld', lambda v: url_to_name_map[v] return etl.fieldmap(characters_page, transformations)
def test_sync_users(src_conn, dest_conn): src_table = etl.fromdb(src_conn, 'select * from auth_user limit 8') mapping = OrderedDict() mapping['org_id'] = lambda x: 1 mapping['username'] = '******' mapping['name'] = 'full_name' mapping['status'] = lambda x: 'ACTIVE' mapping['uid'] = 'username' mapping['type'] = lambda x: "STUDENT" dst_table = etl.fieldmap(src_table, mapping) upsert_many(dest_conn['auth_user'], dst_table, keys=['username'])
def dataPreProcessing(fileName): inputData = fromcsv(fileName) table1 = cutout(inputData, 'member_id', 'grade', 'sub_grade', 'emp_title', 'url', 'desc', 'title', 'accept_d', 'exp_d', 'list_d', 'issue_d', 'purpose', 'addr_city', 'addr_state', 'earliest_cr_line', 'last_pymnt_d', 'next_pymnt_d', 'last_credit_pull_d') table2 = select( table1, lambda i: i['term'] == ' 36 months' and i['loan_status'] is not "") labelMapping = OrderedDict() labelMapping['loan_status'] = 'loan_status' labelMapping['id'] = 'id' table6 = fieldmap(table2, labelMapping) table8 = sort(table6, 'id') table10 = cutout(table8, 'id') mappings = OrderedDict() mappings['id'] = 'id' mappings['home_ownership'] = 'ownership', { 'MORTGAGE': '-1', 'RENT': '0', 'OWN': '1' } mappings['emp_length'] = 'empLength', {'n/a': 0} mappings['is_inc_v'] = 'verificationStatus', { 'Source Verified': 1, 'Verified': 0, 'Not Verified': -1 } mappings['pymnt_plan'] = 'paymentPlan', {'n': 0, 'y': 1} mappings['initial_list_status'] = 'listStatus', {'f': 0, 'w': 1} table3 = fieldmap(table2, mappings) table4 = cutout(table2, 'home_ownership', 'is_inc_v', 'pymnt_plan', 'initial_list_status', 'term', 'loan_status') table5 = merge(table3, table4, key='id') table7 = sort(table5, 'id') table9 = cutout(table7, 'id') featureFileCsv = tocsv(table9, 'featureFileCsv.csv') labelsFileCsv = tocsv(table10, 'labelsFileCsv.csv') return featureFileCsv, labelsFileCsv
def createFacts(events, users): try: events_uid = etl.cutout(events, 'tracking_id', 'utm_medium', 'utm_campaign') events_tui = etl.cutout(events, 'user_id') stage_uid = etl.join(users, events_uid, key='user_id') stage_tui = etl.join(users, events_tui, key='tracking_id') stage_utm = etl.cut(stage_tui, 'user_id', 'utm_medium', 'utm_campaign') stage_uid_utm = etl.join(stage_uid, stage_utm, key='user_id') stage_m_s = etl.mergesort(stage_uid_utm, stage_tui, key=['created_at', 'email']) mappings = OrderedDict() mappings['tid'] = 'tracking_id' mappings['uid'] = 'user_id' mappings['utm_medium'] = 'utm_medium' mappings['utm_campaign'] = 'utm_campaign', {'audio': 'none', 'social': 'none'} mappings['utm_campaigntype'] = 'utm_campaign' mappings['email'] = 'email' mappings['subscription'] = 'type' mappings['sub_order'] = 'type', {'Signup Completed': '1', 'Trial Started': '2', 'Subscription Started': '3', 'Subscription Ended': '4'} mappings['created_at'] = 'created_at' # Mapping stage_mapping = etl.fieldmap(stage_m_s, mappings) # Sort stage_mapping_ordered = etl.sort(stage_mapping, key=['created_at', 'email', 'sub_order']) # Datetime split t1 = etl.split(stage_mapping_ordered, 'created_at', 'T', ['date', 'time'], include_original=True) t2 = etl.split(t1, 'date', '-', ['year', 'month', 'day']) stage_ready = etl.split(t2, 'time', ':', ['hour', 'minute', 'second']) # Export as csv to load folder etl.tocsv(stage_ready, 'load/facts.csv') except Exception as e: print("Something went wrong. Error {0}".format(e))
def export_process(self): total = petl.nrows(self.data) if self.data and total > 0: mappings = OrderedDict() mappings['Location'] = 'location' mappings['Position'] = 'position', lambda rec: construct_postion( rec) mappings[ 'Local Time'] = 'time', lambda rec: datetime.fromtimestamp( rec).strftime("%Y-%m-%d %H:%M:%S") mappings[ 'Conditions Time'] = 'summary', lambda rec: populate_summary( rec) mappings['Temperature'] = 'temperature' mappings['Pressure'] = 'pressure' mappings['Humidity'] = 'humidity', lambda rec: int(rec * 100) self.data = petl.fieldmap(self.data, mappings) return True else: print( 'Data store doesnt have historic Data. Please run import and export Job' ) return False
with open('src/etl/outputs/attributes.json') as data_file: data = json.load(data_file) filePath = data['filePath'] dataTable = etl.fromcsv(filePath) mappings = OrderedDict() #length of attribtues map length = len(data['attibutes']) #map Attributes and clean them for x in range(length): attr = data['attibutes'][x]['attrName'] matchingField = data['attibutes'][x]['matchingField'] mappings[attr] = matchingField mappedTable = etl.fieldmap(dataTable, mappings) cleansedTable = mappedTable #add rules to clean the table - reversed for give the priority for top attributes for x in reversed(range(length)): attr = data['attibutes'][x]['attrName'] rules = data['attibutes'][x]['rules'] rulesListSize = len(rules) for y in range(rulesListSize): if rules[y] == "Remove Null Value Rows": cleansedTable = etl.select(cleansedTable, attr, lambda v: v != '') if rules[y] == "Remove Duplicates": cleansedTable = etl.aggregate(cleansedTable, attr) if rules[y] == "Sort": cleansedTable = etl.mergesort(cleansedTable, key=attr) if rules[y] == "Number Validation":
def transform(mmj_menu_items, mmj_categories, prices, organization_id, source_db, debug): """ Transform data """ # source data table source_dt = utils.view_to_list(mmj_menu_items) cut_menu_data = [ 'id', 'vendor_id', 'menu_id', 'dispensary_id', 'strain_id', 'created_at', 'updated_at', 'category_id', 'name', 'sativa', 'indica', 'on_hold', 'product_type', 'image_file_name', 'medicine_amount', 'product_type' ] cut_prices = [ 'menu_item_id', 'dispensary_id', 'price_half_gram', 'price_gram', 'price_two_gram', 'price_eigth', 'price_quarter', 'price_half', 'price_ounce' ] # Cut out all the fields we don't need to load menu_items = etl.cut(source_dt, cut_menu_data) prices_data = etl.cut(prices, cut_prices) menu_items = (etl.addfield( menu_items, 'createdAtEpoch').addfield('unitOfMeasure').addfield( 'locationProductDetails').addfield('keys').addfield('restockLevel') ) # Two-step transform and cut. First we need to cut the name # and id from the source data to map to. cut_source_cats = etl.cut(mmj_categories, 'name', 'id', 'measurement') source_values = etl.values(cut_source_cats, 'name', 'id') # Then we nede a dict of categories to compare against. # id is stored to match against when transforming and mapping categories mmj_categories = dict([(value, id) for (value, id) in source_values]) mappings = OrderedDict() mappings['id'] = 'id' mappings['createdAt'] = 'created_at' mappings['updatedAt'] = 'updated_at' mappings['createdAtEpoch'] = lambda x: utils.create_epoch(x.created_at) mappings['name'] = 'name' mappings['shareOnWM'] = lambda x: _wm_integration(x.id, source_db) """ 1 = Units 2 = Grams (weight) """ mappings['unitOfMeasure'] = \ lambda x: _map_uom(x.category_id, source_db) fields = etl.fieldmap(menu_items, mappings) data = etl.merge(menu_items, fields, key='id') items = [] for item in etl.dicts(data): breakpoint_pricing = (etl.select( prices_data, lambda x: x.dispensary_id == item['dispensary_id']).rename({ 'price_eigth': 'price_eighth' }).cutout('menu_item_id')) # Set image url for load to download url = None if debug and item['image_file_name'] is not None: url = ("https://wm-mmjmenu-images-development.s3." "amazonaws.com/menu_items/images/{0}/large/" "{1}").format(item['id'], item['image_file_name']) elif item['image_file_name'] is not None: url = ("https://wm-mmjmenu-images-production.s3." "amazonaws.com/menu_items/images/{0}/large/" "{1}").format(item['id'], item['image_file_name']) item['image_file_name'] = url item['categoryId'] = _map_categories(item['category_id'], item['sativa'], item['indica'], mmj_categories, menu_items) item['keys'] = { 'dispensary_id': item['dispensary_id'], 'id': item['id'], 'menu_id': item['menu_id'], 'vendor_id': item['vendor_id'], 'strain_id': item['strain_id'], 'category_id': item['category_id'] } # set a default netMJ value if the menu item is a unit product if item['unitOfMeasure'] is 2: item['netMarijuana'] = int(item['medicine_amount']) for key in item['keys'].keys(): if not item['keys'][key]: del item['keys'][key] item['locationProductDetails'] = { 'id': item['id'], 'active': _active(item['on_hold']) } item['restockLevel'] = _restock_level(item['dispensary_id'], item['product_type'], source_db) if item['shareOnWM'] is None: item['shareOnWM'] = False for price in etl.dicts(breakpoint_pricing): try: price_two_gram = price['price_two_gram'] except KeyError: price_two_gram = 0.0 item['locationProductDetails']['weightPricing'] = { 'price_half_gram': utils.dollars_to_cents(price['price_half_gram']), 'price_two_gram': utils.dollars_to_cents(price_two_gram), 'price_gram': utils.dollars_to_cents(price['price_gram']), 'price_eighth': utils.dollars_to_cents(price['price_eighth']), 'price_quarter': utils.dollars_to_cents(price['price_quarter']), 'price_half': utils.dollars_to_cents(price['price_half']), 'price_ounce': utils.dollars_to_cents(price['price_ounce']) } del item['vendor_id'] del item['indica'] del item['dispensary_id'] del item['id'] del item['strain_id'] del item['on_hold'] del item['menu_id'] del item['sativa'] del item['category_id'] del item['updated_at'] del item['created_at'] del item['product_type'] if item['image_file_name'] is None: del item['image_file_name'] # set up final structure for API items.append(item) # Remove inactive items for item in items: if item['locationProductDetails']['active'] is False: items.remove(item) if debug: result = json.dumps(items, sort_keys=True, indent=4, default=utils.json_serial) print(result) return items
import re from collections import OrderedDict import pymysql import dbconfig read_db_conn = pymysql.connect(host=dbconfig.db_host, port=dbconfig.db_port, charset="utf8", user=dbconfig.db_user, password=dbconfig.db_pass, db=dbconfig.db_name) products = etl.fromdb( read_db_conn, "SELECT id,name,description FROM {} limit 5000".format( dbconfig.db_table_items)) # remove non-alphanumeric character def cleanString(val): nonewline = val.replace('\n', " ") return re.sub(r'\W+', ' ', nonewline).lower() mappings = OrderedDict() mappings['id'] = 'id' mappings['item_description'] = lambda val: cleanString(val['name'] + " " + val[ 'description']) products = etl.fieldmap(products, mappings) etl.tocsv(products, 'query_result.csv')
def transform(source_data, organization_id, debug): """ Load the transformed data into the destination(s) """ # source data table source_dt = utils.view_to_list(source_data) cut_data = [ 'id', 'dispensary_id', 'picture_file_name', 'name', 'email', 'address', 'phone_number', 'dob', 'license_type', 'registry_no', 'membership_id', 'given_caregivership', 'tax_exempt', 'drivers_license_no', 'points', 'locked_visits', 'locked_visits_reason', 'caregiver_id', 'picture_file_name', 'card_expires_at', 'created_at', 'updated_at', 'physician_id', 'custom_membership_id', 'organization_membership_id', 'city', 'state', 'zip_code', 'address', 'organization_id' ] member_data = etl.cut(source_dt, cut_data) members = ( etl .addfield(member_data, 'identificationType') .addfield('createdAtEpoch') ) member_mapping = OrderedDict() member_mapping['id'] = 'id' member_mapping['caregiver_id'] = 'caregiver_id' member_mapping['dispensary_id'] = 'dispensary_id' member_mapping['physician_id'] = 'physician_id' member_mapping['custom_membership_id'] = 'custom_membership_id' member_mapping['organization_membership_id'] = 'organization_membership_id' member_mapping['picture_file_name'] = 'picture_file_name' member_mapping['dateOfBirth'] = 'dob' member_mapping['name'] = 'name' member_mapping['phone_number'] = 'phone_number' member_mapping['email'] = 'email' member_mapping['organization_id'] = 'organization_id' # MEDICAL 1, RECREATIONAL 2 member_mapping['memberType'] = \ lambda m: 'MEDICAL' if m.license_type == 1 else 'RECREATIONAL' member_mapping['mmjCard'] = 'registry_no' member_mapping['isCaregiver'] = \ lambda x: utils.true_or_false(x.given_caregivership) member_mapping['identificationNumber'] = 'drivers_license_no' member_mapping['points'] = 'points' member_mapping['card_expires_at'] = 'card_expires_at' member_mapping['taxExempt'] = lambda x: utils.true_or_false(x.tax_exempt) member_mapping['locked_visits'] = 'locked_visits' member_mapping['locked_visits_reason'] = 'accountStatusNotes' member_mapping['address'] = 'address' member_mapping['city'] = 'city' member_mapping['zip_code'] = 'zip_code' member_mapping['state'] = 'state' member_mapping['createdAt'] = 'created_at' member_mapping['updatedAt'] = 'updated_at' member_mapping['accountStatus'] = \ lambda x: utils.account_status(x.locked_visits) member_fields = etl.fieldmap(members, member_mapping) members = [] for item in etl.dicts(member_fields): item['keys'] = { 'id': item['id'], 'caregiver_id': item['caregiver_id'], 'dispensary_id': item['dispensary_id'], 'physician_id': item['physician_id'], 'custom_membership_id': item['custom_membership_id'], 'organization_membership_id': item['organization_membership_id'], 'picture_file_name': item['picture_file_name'], 'organization_id': item['organization_id'], } if item['card_expires_at'] is not None: item['expiryDate'] = item['card_expires_at'] # remove any item['keys'] tuples with None values for key in item['keys'].keys(): if not item['keys'][key]: del item['keys'][key] # set up final structure for API item['identificationType'] = 'Drivers License' # We may not need this in the data item['address'] = [{ 'line1': item['address'], 'city': item['city'], 'state': item['state'], 'zip': item['zip_code'], }] # replace None value dobs with the epoch beginning of time if not item['dateOfBirth']: dob = time.strftime('%Y-%m-%d %H:%M:%S.000Z', time.gmtime(0)) item['dateOfBirth'] = datetime.strptime(dob, '%Y-%m-%d %H:%M:%S.000Z') #del item['address'] del item['city'] del item['zip_code'] del item['state'] del item['dispensary_id'] del item['id'] del item['physician_id'] del item['caregiver_id'] del item['custom_membership_id'] del item['organization_membership_id'] del item['organization_id'] del item['picture_file_name'] del item['locked_visits_reason'] del item['locked_visits'] del item['card_expires_at'] members.append(item) if debug: result = json.dumps(members, sort_keys=True, indent=4, default=utils.json_serial) print(result) return members
def fieldmap_execute(c, fieldmap={}, **kwargs): r = c() if fieldmap: r = etl.fieldmap(r, fieldmap) return r
[5, '-', 25, 1.65, 51.9]] from petl import fieldmap, look look(table1) from collections import OrderedDict mappings = OrderedDict() # rename a field mappings['subject_id'] = 'id' # translate a field mappings['gender'] = 'sex', {'male': 'M', 'female': 'F'} # apply a calculation to a field mappings['age_months'] = 'age', lambda v: v * 12 # apply a calculation to a combination of fields mappings['bmi'] = lambda rec: rec['weight'] / rec['height']**2 # transform and inspect the output table2 = fieldmap(table1, mappings) look(table2) # field mappings can also be added and/or updated after the table is created # via the suffix notation table3 = fieldmap(table1) table3['subject_id'] = 'id' table3['gender'] = 'sex', {'male': 'M', 'female': 'F'} table3['age_months'] = 'age', lambda v: v * 12 # use an expression string this time table3['bmi'] = '{weight} / {height}**2' look(table3) # facet table1 = [['foo', 'bar', 'baz'],
def transform(source_data, organization_id, debug): """ Load the transformed data into the destination(s) """ # source data table source_dt = utils.view_to_list(source_data) cut_data = ['id', 'dispensary_id', 'name', 'email', 'created_at', 'updated_at', 'address', 'city', 'state', 'country', 'zip_code', 'website', 'license_no', 'phone_number'] physician_data = etl.cut(source_dt, cut_data) physicians = ( etl .addfield(physician_data, 'createdAtEpoch') ) physician_mapping = OrderedDict() physician_mapping['id'] = 'id' physician_mapping['dispensary_id'] = 'dispensary_id' physician_mapping['name'] = 'name' physician_mapping['email'] = 'email' physician_mapping['createdAt'] = 'created_at' physician_mapping['updatedAt'] = 'updated_at' physician_mapping['address'] = 'address' physician_mapping['city'] = 'city' physician_mapping['state'] = 'state' physician_mapping['zip_code'] = 'zip_code' physician_mapping['country'] = 'country' physician_mapping['verificationWebsite'] = 'website' physician_mapping['licenceNumber'] = 'license_no' physician_mapping['phone'] = 'phone_number' physician_fields = etl.fieldmap(physicians, physician_mapping) physicians = [] for item in etl.dicts(physician_fields): item['keys'] = { 'dispensary_id': item['dispensary_id'], 'id': item['id'] } # remove any item['keys'] tuples with None values for key in item['keys'].keys(): if not item['keys'][key]: del item['keys'][key] if item['city'] or item['address'] or item['state'] or item['zip_code'] or item['country']: item['address'] = [{ 'line1': item['address'], 'city': item['city'], 'state': item['state'], 'zip': item['zip_code'], 'country': item['country'], }] name = re.sub(r'^(Dr(?:.)?s|(?:Dr.?))', '', item['name'], flags=re.IGNORECASE) item['name'] = name.strip() if item['phone'] is not None: item['phone'] = [{ 'name': 'work', 'number': item['phone'], 'default': True }] else: del item['phone'] item['specialty'] = 'General' if item['licenceNumber'] is None or item['verificationWebsite'] \ is None or item['email'] is None: del item['licenceNumber'] del item['verificationWebsite'] del item['email'] if item['address'] is None: del item['address'] del item['city'] del item['zip_code'] del item['state'] del item['country'] del item['dispensary_id'] del item['id'] physicians.append(item) if debug: result = json.dumps(physicians, sort_keys=True, indent=4, default=utils.json_serial) print(result) return physicians
def transform(source_data, organization_id, debug): """ Load the transformed data into the destination(s) """ # source data table source_dt = utils.view_to_list(source_data) cut_data = [ 'id', 'dispensary_id', 'mmjvenu_id', 'name', 'phone_number', 'email', 'country', 'state', 'city', 'address', 'zip_code', 'liscense_no', 'confirmed', 'website' ] vendor_data = etl.cut(source_dt, cut_data) vendor_mappings = OrderedDict() vendor_mappings['id'] = 'id' vendor_mappings['dispensary_id'] = 'dispensary_id' vendor_mappings['address'] = 'address' # field renames vendor_mappings['accountStatus'] = \ lambda x: "ACTIVE" if x.confirmed == 1 else "INACTIVE" vendor_mappings['phone'] = 'phone_number' vendor_mappings['licenceNumber'] = 'liscense_no' vendor_mappings['zip'] = 'zip_code' vendors_fields = etl.fieldmap(vendor_data, vendor_mappings) merged_vendors = etl.merge(vendor_data, vendors_fields, key='id') vendors = [] for item in etl.dicts(merged_vendors): if item['address'] is not None: item['address'] = { 'line1': item['address'], 'line2': None, 'city': item['city'], 'state': item['state'], 'zip': item['zip'], 'country': item['country'], } else: del item['address'] if item['licenceNumber'] is None or item['email'] is None or item[ 'website'] is None: del item['licenceNumber'] del item['email'] del item['website'] if item['phone'] is not None: item['phone'] = [{ 'name': 'business', 'number': item['phone'], 'default': True }] else: del item['phone'] item['keys'] = { 'dispensary_id': item['dispensary_id'], 'id': item['id'], 'mmjvenu_id': item['mmjvenu_id'] } # remove any item['keys'] tuples with None values for key in item['keys'].keys(): if not item['keys'][key]: del item['keys'][key] # mutate dict and remove fields that are mapped and no longer required del item['zip'] del item['state'] del item['country'] del item['city'] del item['zip_code'] del item['phone_number'] del item['confirmed'] del item['liscense_no'] # delete fk's del item['mmjvenu_id'] del item['id'] del item['dispensary_id'] # set up final structure for API vendors.append(item) if debug: result = json.dumps(vendors, sort_keys=True, indent=4, default=utils.json_serial) print(result) return vendors
def transform(mmj_employees, organization_id, debug, fake_email, source_db): """ Load the transformed data into the destination(s) """ # source data table source_dt = utils.view_to_list(mmj_employees) cut_data = [ 'id', 'email', 'first_name', 'organization_id', 'last_name', 'created_at', 'updated_at', 'login' ] employee_data = etl.cut(source_dt, cut_data) employees = (etl.addfield( employee_data, 'keys').addfield('name').addfield('role').addfield('dateOfBirth')) mappings = OrderedDict() mappings['id'] = 'id' mappings['name'] = \ lambda name: _set_name(name.first_name, name.last_name, name.login) """ Roles: 1 = site-admin 2 = site-admin 3 = store-manager 4 = budtender """ mappings['role'] = lambda x: _assign_role(x.id, source_db) mappings['createdAt'] = 'created_at' mappings['updatedAt'] = 'updated_at' mappings['dateOfBirth'] = \ lambda _: datetime.datetime(year=1970, month=01, day=01, hour=02, minute=30) mappings['organization_id'] = 'organization_id' # keep mmj org mappings['accountStatus'] = lambda x: _active(x.id, source_db) fields = etl.fieldmap(employees, mappings) merged_employees = etl.merge(employees, fields, key='id') mapped_employees = [] for item in etl.dicts(merged_employees): item['keys'] = { 'id': item['id'], 'organization_id': item['organization_id'] } # remove any item['keys'] tuples with None values for key in item['keys'].keys(): if not item['keys'][key]: del item['keys'][key] item['email'] = _set_email(item['email'], fake_email, debug) del item['login'] del item['first_name'] del item['last_name'] del item['created_at'] del item['id'] del item['organization_id'] # set up final structure for API mapped_employees.append(item) if debug: result = json.dumps(mapped_employees, sort_keys=True, indent=4, default=utils.json_serial) print(result) return mapped_employees
[1, 'male', 16, 1.45, 62.0], [2, 'female', 19, 1.34, 55.4], [3, 'female', 17, 1.78, 74.4], [4, 'male', 21, 1.33, 45.2], [5, '-', 25, 1.65, 51.9]] mappings = OrderedDict() # rename a field mappings['subject_id'] = 'id' # translate a field mappings['gender'] = 'sex', {'male': 'M', 'female': 'F'} # apply a calculation to a field mappings['age_months'] = 'age', lambda v: v * 12 # apply a calculation to a combination of fields mappings['bmi'] = lambda rec: rec['weight'] / rec['height']**2 # transform and inspect the output table2 = etl.fieldmap(table1, mappings) table2 # rowmap() ########## import petl as etl table1 = [['id', 'sex', 'age', 'height', 'weight'], [1, 'male', 16, 1.45, 62.0], [2, 'female', 19, 1.34, 55.4], [3, 'female', 17, 1.78, 74.4], [4, 'male', 21, 1.33, 45.2], [5, '-', 25, 1.65, 51.9]] def rowmapper(row):
def transform(dispensary_details, pricing, organization_id, debug, source_db): """ Load the transformed data into the destination(s) """ # source data table general_settings = utils.view_to_list(dispensary_details) pricing_detail = utils.view_to_list(pricing) dispensary_cut_data = [ 'id', 'dispensary_id', 'menu_show_tax', 'logo_file_name', 'inactivity_logout', 'calculate_even_totals', 'require_customer_referrer', 'membership_fee_enabled', 'pp_enabled', 'pp_global_dollars_to_points', 'pp_global_points_to_dollars', 'pp_points_per_referral', 'allow_unpaid_visits', 'red_flags_enabled', 'mmjrevu_api_key' ] pricing_cut_data = [ 'id', 'price_half_gram', 'price_gram', 'price_two_gram', 'price_eigth', 'price_quarter', 'price_half', 'price_ounce' ] dispensary_settings_data = etl.cut(general_settings, dispensary_cut_data) pricing_data = etl.cut(pricing_detail, pricing_cut_data) settings = (etl.addfield(dispensary_settings_data, 'organizationId')) mappings = OrderedDict() mappings['id'] = 'id' # field renames mappings['organizationId'] = organization_id settings_fields = etl.fieldmap(settings, mappings) merged_settings = ( etl.merge(settings, settings_fields, key='id').rename({ # Global -> General -> SESSION TIMEOUT DURATION 'inactivity_logout': 'sessionTimeoutDuration', # Global -> Logo 'logo_file_name': 'image', # Global -> Members -> Membership Level 'membership_fee_enabled': 'membershipLevelsEnabled', 'pp_global_dollars_to_points': 'dollarsPerPoint', 'pp_global_points_to_dollars': 'pointsPerDollar', 'pp_points_per_referral': 'referralPoints', # <Location> -> Sales -> TAXES IN 'menu_show_tax': 'enableTaxesIn', # <Location> -> Sales -> PRICE ROUNDING 'calculate_even_totals': 'hasPriceRounding', # <Location> -> Members -> REFERRER REQUIRED 'require_customer_referrer': 'mandatoryReferral', # <Location> -> Members -> PAID VISITS 'allow_unpaid_visits': 'paidVisitsEnabled', # <Location> -> Members -> MEDICAL MEMBERS 'red_flags_enabled': 'hasLimits', # <Location> -> General -> STORE LOCATIONS 'mmjrevu_api_key': 'apiKey' })) settings = {} for item in etl.dicts(merged_settings): item['keys'] = { 'dispensary_id': item['dispensary_id'], 'id': item['id'] } # if not item['sessionTimeoutDuration'] >= 30: # del item['sessionTimeoutDuration'] url = None if debug and item['image'] is not None: url = ("https://wm-mmjmenu-images-development.s3." "amazonaws.com/logos/{0}/original/" "{1}").format(item['id'], item['image']) elif item['image'] is not None: url = ("https://wm-mmjmenu-images-production.s3." "amazonaws.com/logos/{0}/original/" "{1}").format(item['id'], item['image']) item['image'] = url # remove any item['keys'] tuples with None values for key in item['keys'].keys(): if not item['keys'][key]: del item['keys'][key] """ Member settings nested - crm.member.settings """ if item['pp_enabled']: item['crm_member_settings'] = {} item['crm_member_settings']['membershipLevel'] = { 'membershipLevelsEnabled': \ utils.true_or_false(item['membershipLevelsEnabled']), 'levelName': 'Unnamed', 'dollarsPerPoint': item['dollarsPerPoint'], 'pointsPerDollar': item['pointsPerDollar'], 'referralPoints': item['referralPoints'] } """ Location settings nested. """ if item['apiKey']: item['location_specific'] = {'apiKey': item['apiKey']} else: item['location_specific'] = {} item['location_specific']['members'] = { 'paidVisitsEnabled': utils.true_or_false(item['paidVisitsEnabled']), 'mandatoryReferral': utils.true_or_false(item['mandatoryReferral']) } item['location_specific']['sales'] = { 'enableTaxesIn': utils.true_or_false(item['enableTaxesIn']), 'hasPriceRounding': utils.true_or_false(item['hasPriceRounding']) } # sales.settings.taxes item['sales_settings_taxes'] = {} for tax in _get_taxes(item['dispensary_id'], source_db): item['sales_settings_taxes']['taxes'] = { 'code': tax['name'], 'percent': tax['amount'] / 100, 'type': 'sales' } for pricing in etl.dicts(pricing_data): item['location_specific']['inventory'] = {} item['location_specific']['inventory']['weightPricing'] = { 'name': 'Default', 'defaultTier': True } item['location_specific']['inventory']['weightPricing'][ 'breakpoints'] = { 'price_half_gram': utils.dollars_to_cents(pricing['price_half_gram']), 'price_gram': utils.dollars_to_cents(pricing['price_gram']), 'price_two_gram': utils.dollars_to_cents(pricing['price_two_gram']), 'price_eighth': utils.dollars_to_cents(pricing['price_eigth']), 'price_quarter': utils.dollars_to_cents(pricing['price_quarter']), 'price_half': utils.dollars_to_cents(pricing['price_half']), 'price_ounce': utils.dollars_to_cents(pricing['price_ounce']), } # monthly purchase limit is two week limit x2 if item['hasLimits'] == 1: for limits in _medical_limits(item['dispensary_id'], source_db): item['location_specific']['members']['medicalLimits'] = { 'hasLimits': True, 'dailyPurchaseLimit': int(limits['daily_purchase_limit']), 'visitPurchaseLimit': int(limits['visit_purchase_limit']), 'dailyVisitLimit': int(limits['daily_visit_limit']), 'monthlyPurchaseLimit': \ int(limits['two_week_purchase_limit'] * 2) } if item['image'] is None or item['apiKey'] is None: del item['image'] del item['apiKey'] # delete fk's del item['id'] del item['dispensary_id'] del item['membershipLevelsEnabled'] del item['enableTaxesIn'] del item['hasLimits'] del item['hasPriceRounding'] del item['dollarsPerPoint'] del item['mandatoryReferral'] del item['paidVisitsEnabled'] del item['pointsPerDollar'] del item['pp_enabled'] del item['referralPoints'] # set up final structure for API settings.update(item) if debug: result = json.dumps(settings, sort_keys=True, indent=4, default=utils.json_serial) print(result) return settings
mappings['uid'] = 'user_id' mappings['utm_medium'] = 'utm_medium' mappings['utm_campaign'] = 'utm_campaign', {'audio': 'none', 'social': 'none'} mappings['utm_campaign_type'] = 'utm_campaign' mappings['email'] = 'email' mappings['subscription'] = 'type' mappings['sub_order'] = 'type', { 'Signup Completed': '1', 'Trial Started': '2', 'Subscription Started': '3', 'Subscription Ended': '4' } mappings['created_at'] = 'created_at' # Mapping stage_mapping = etl.fieldmap(stage_m_s, mappings) # Sort stage_mapping_ordered = etl.sort(stage_mapping, key=['created_at', 'email', 'sub_order']) # Datetime split t1 = etl.split(stage_mapping_ordered, 'created_at', 'T', ['date', 'time'], include_original=True) t2 = etl.split(t1, 'date', '-', ['year', 'month', 'day']) stage_ready = etl.split(t2, 'time', ':', ['hour', 'minute', 'second']) # Export as csv to load folder etl.tocsv(stage_ready, 'load/facts.csv')
import petl as etl import re from collections import OrderedDict import pymysql import dbconfig read_db_conn = pymysql.connect(host=dbconfig.db_host, port=dbconfig.db_port, charset="utf8", user=dbconfig.db_user, password=dbconfig.db_pass, db=dbconfig.db_name) products = etl.fromdb(read_db_conn, "SELECT id,name,description FROM {} limit 5000".format(dbconfig.db_table_items)) # remove non-alphanumeric character def cleanString(val): nonewline = val.replace('\n'," ") return re.sub(r'\W+', ' ', nonewline).lower() mappings = OrderedDict() mappings['id'] = 'id' mappings['item_description'] = lambda val : cleanString(val['name'] + " " +val['description']) products = etl.fieldmap(products, mappings) etl.tocsv(products, 'query_result.csv')