def run(source_file_name, output_file_name): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) rename_map = { 'Supplies Group': 'Product2Family__c', 'Region': 'Region__c', 'Route To Market': 'LeadSource', 'Elapsed Days In Sales Stage': 'TimeToClose__c', 'Sales Stage Change Count': 'SalesStageCount__c', 'Opportunity Amount USD': 'Amount', 'Deal Size Category': 'DealSizeCategory__c' } data_gen.rename_columns(rename_map) # multiple time to close by 2 data_gen.add_formula_column('TimeToClose__c', lambda cv: int(cv['TimeToClose__c']) * 2) # map existing columns to new columns data_gen.add_map_column('Competitor__c', 'Competitor Type', definitions.competitor_type) data_gen.add_map_column('Product2Name__c', 'Supplies Subgroup', definitions.supplies_subgroup_map) data_gen.add_map_column('AccountAnnualRevenue__c', 'Client Size By Revenue', definitions.client_size_rev) data_gen.add_map_column('AccountNumberOfEmployees__c', 'Client Size By Employee Count', definitions.client_size_employees) data_gen.add_map_column('AccountBookings__c', 'Revenue From Client Past Two Years', definitions.client_past_revenue) data_gen.add_map_column('IsWon', 'Opportunity Result', definitions.isWon) # generate external id data_gen.add_formula_column( 'External_Id__c', formula=lambda: 'W_Opportunity.' + str(data_gen.current_row + 1)) data_gen.add_formula_column( 'Exec_Meeting__c', lambda: choice(['true', 'false'], p=[.35, .65])) data_gen.add_formula_column( 'Interactive_Demo__c', lambda: choice(['true', 'false'], p=[.30, .70])) def ttc_formula(column_values): ttc = int(column_values['TimeToClose__c']) exec_meeting = column_values['Exec_Meeting__c'] competitor_type = column_values['Competitor Type'] demo = column_values['Interactive_Demo__c'] rev = column_values['AccountAnnualRevenue__c'] if ttc == 0: return 0 if exec_meeting == 'true': if competitor_type == 'None': ttc = ttc + 4 else: ttc = ttc - 6 if demo == 'true': if rev == 'T100': ttc = ttc + 6 else: ttc = ttc - 5 if ttc < 0: return 0 return ttc data_gen.add_formula_column('TimeToClose__c', formula=ttc_formula) data_gen.add_constant_column('IsClosed', 'true') data_gen.add_formula_column( 'RecordType.DeveloperName', formula=lambda: choice(['SimpleOpportunity', 'ChannelPartner'], p=[.70, .30])) # generate opportunity type types = [ 'Add-On Business', 'Existing Business', 'New Business', 'New Business / Add-on' ] data_gen.add_formula_column( 'Type', formula=lambda: choice(types, p=[0.1, 0.3, 0.5, 0.1])) # generate a close date year and quarter data_gen.add_formula_column('close_date_year', formula=lambda: choice(list(range(0, 30)))) data_gen.add_formula_column( 'close_date_quarter', formula=lambda: choice([1, 2, 3, 4], p=[0.21, 0.24, 0.22, 0.33])) # generate a close date offset from the year and quarter def offset_formula(column_values): day = int(round(chisquare(9) * 5)) offset = 365 * (column_values['close_date_year']) + 91 * ( column_values['close_date_quarter'] - 1) + day return offset data_gen.add_formula_column('close_date_offset__c', offset_formula) # generate a close date def close_date_formula(column_values): last_day = date(date.today().year, 12, 31) offset = column_values['close_date_offset__c'] # last day of current year - offset close_date = last_day - timedelta(days=int(offset)) return str(close_date) data_gen.add_formula_column('CloseDate', close_date_formula) # generate a create date def create_date_formula(column_values): close_date = dateutil.parser.parse(column_values['CloseDate']) offset = column_values['TimeToClose__c'] create_date = close_date - timedelta(days=int(offset)) return create_date.isoformat(sep=' ') data_gen.add_formula_column('CreatedDate__c', create_date_formula) # generate last activity date def last_activity_date_formula(column_values): create_date = dateutil.parser.parse(column_values['CreatedDate__c']) close_date = dateutil.parser.parse(column_values['CloseDate']) if close_date > today_datetime: close_date = today_datetime if create_date > today_datetime: create_date = today_datetime return fake.date_time_between_dates(create_date, close_date).date() data_gen.add_formula_column('LastActivityDate__c', formula=last_activity_date_formula) # generate StageName, ForecastCategory, and Probability data_gen.add_map_column('StageName', 'Opportunity Result', value_map={ 'Won': 'Closed Won', None: 'Closed Lost' }) data_gen.add_map_column('ForecastCategory', 'Opportunity Result', value_map={ 'Won': 'Closed', None: 'Omitted' }) data_gen.add_map_column('ForecastCategoryName', 'Opportunity Result', value_map={ 'Won': 'Closed', None: 'Omitted' }) data_gen.add_map_column('Probability', 'Opportunity Result', value_map={ 'Won': '100', None: '0' }) # randomly pick an owner from the same region region_territory_map = { 'Pacific': lambda: 'W_Sales_User.' + str(choice([1, 2, 3, 4, 5, 6])), "Northwest": lambda: 'W_Sales_User.' + str(choice([1, 2, 3, 4, 5, 6])), "Midwest": lambda: 'W_Sales_User.' + str(choice([7, 8, 9, 10, 11])), "Southwest": lambda: 'W_Sales_User.' + str(choice([7, 8, 9, 10, 11])), "Mid-Atlantic": lambda: 'W_Sales_User.' + str(choice([7, 8, 9, 10, 11])), "Northeast": lambda: 'W_Sales_User.' + str(choice([12, 13, 14, 15, 16, 17])), "Southeast": lambda: 'W_Sales_User.' + str(choice([12, 13, 14, 15, 16, 17])) } data_gen.add_map_column('Owner.External_Id__c', 'Region__c', region_territory_map) # build out helper column for account selection def account_cat_formula(column_values): x1 = column_values['Client Size By Revenue'] x2 = column_values['Client Size By Employee Count'] x3 = column_values['Revenue From Client Past Two Years'] return str(x1) + '.' + str(x2) + '.' + str(x3) data_gen.add_formula_column('account_cat', account_cat_formula) # apply pending transformations now so we can sort by account_cat data_gen.apply_transformations() data_gen.sort_by('account_cat') # helper dataset used for account selection data_gen.add_dataset('account_segment', { 'account_id': 0, 'account_count': 0, 'current_account_cat': None }) # generate a distribution of account ids def account_id_formula(column_values): account_segment = data_gen.datasets['account_segment'] account_id = account_segment['account_id'] account_count = account_segment['account_count'] current_account_cat = account_segment['current_account_cat'] if column_values[ 'account_cat'] == current_account_cat and account_count > 0: # continue with the current account_id if there are still any to take # but first decrement account count account_count += -1 account_segment['account_count'] = account_count return account_id else: # use new account id account_id += 1 # generate a random number of opportunties to associate to an account account_count = int(round(lognormal(1))) + randint(1, 7) current_account_cat = column_values['account_cat'] # update account segment dataset for next iteration account_count += -1 account_segment['account_id'] = account_id account_segment['account_count'] = account_count account_segment['current_account_cat'] = current_account_cat return account_id data_gen.add_formula_column('AccountId__c', account_id_formula) # generate account id string data_gen.add_formula_column( 'AccountExternalId__c', formula=lambda cv: 'W_Account.' + str(cv['AccountId__c'])) # generate account name string account_names = {} def account_name_formula(column_values): account_id = column_values['AccountId__c'] if account_id in account_names: return account_names[account_id] else: account_name = account.account_name() account_names[account_id] = account_name return account_name data_gen.add_formula_column('AccountName__c', formula=account_name_formula) # generate name def name_formula(column_values): account_name = column_values['AccountName__c'] amount = column_values['Amount'] product_2_name = column_values['Product2Name__c'] return account_name + ' ' + str(data_gen.current_row % 256) data_gen.add_formula_column('Name', name_formula) # apply remaining transformations data_gen.apply_transformations() # sort by account id data_gen.sort_by('AccountId__c') columns_to_write = [ 'External_Id__c', 'Product2Name__c', 'Product2Family__c', 'Region__c', 'LeadSource', 'TimeToClose__c', 'SalesStageCount__c', 'Amount', 'AccountAnnualRevenue__c', 'AccountNumberOfEmployees__c', 'AccountBookings__c', 'Competitor__c', 'DealSizeCategory__c', 'AccountExternalId__c', 'AccountName__c', 'close_date_year', 'close_date_quarter', 'close_date_offset__c', 'Exec_Meeting__c', 'Interactive_Demo__c', 'IsWon', 'IsClosed', 'Owner.External_Id__c', 'Name', 'Type', 'StageName', 'ForecastCategory', 'ForecastCategoryName', 'Probability', 'RecordType.DeveloperName' ] data_gen.write(output_file_name, columns_to_write)
def run(source_file_name, output_file_name): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) # find mean and std of profit profits = [] for row in data_gen.rows: column_values = data_gen.row_to_column_values(row) profits.append(float(column_values['Profit'])) profit_mean = mean(profits) profit_std = std(profits) # filter out profits more than 2 std out. def filter_func(column_values): profit = float(column_values['Profit']) z_score = abs((profit - profit_mean) / profit_std) return z_score <= 2 data_gen.filter(filter_function=filter_func) store_tier_map = { 'New York 4': "Tier 1", 'New York 3': "Tier 1", 'New York 2': "Tier 1", 'New York 1': "Tier 1", 'Chicago 3': "Tier 1", 'Chicago 2': "Tier 2", 'Chicago 1': "Tier 2", 'Boston 2': "Tier 2", 'Boston 1': "Tier 3" } data_gen.add_map_column('Tier', 'Store', store_tier_map) month_channel_map = { 'January': 'Chat', 'February': 'Chat', 'March': 'Chat', 'April': 'Chat', 'May': 'Chat', 'June': 'Email', 'July': 'Email', 'August': 'Facebook', 'September': 'Phone', 'October': 'Phone', 'November': 'Website', 'December': 'Website' } data_gen.add_map_column('Origin', 'Month', month_channel_map) discount_support_map = { '0': 'Free', '0.05': 'Free', '0.15': 'Basic', '0.1': 'Silver', '0.2': 'Platinum' } data_gen.add_map_column('Type_of_Support__c', 'Discount', discount_support_map) camp_reason_map = { "Bundled": "Documentation", "Buy More & Save": "Unknown Failure", "Competitor Focus": "Feature Question", "Door Buster": "Hardware Question", "Friends & Family": "Late Delivery", "Local": "Software Question", "Paper Circular": "General Question", "Regional": "Item Damaged", "Social": "Item Damaged" } data_gen.add_map_column('Reason', 'Marketing Campaign', camp_reason_map) city_priority_map = { "Boston": "Low", "Chicago": "Medium", "New York": "High" } data_gen.add_map_column('Priority', 'City', city_priority_map) comp_sla_map = { "High": "Violation", "Normal": "Compliant", "Low": "Compliant" } data_gen.add_map_column('SLA', 'Competition', comp_sla_map) data_gen.add_constant_column('Status', 'Closed') sla_first_contact_close_map = { 'Compliant': lambda: choice(['true', 'false'], p=[.9, .1]), 'Violation': lambda: choice(['true', 'false'], p=[.7, .3]) } data_gen.add_map_column('First_Contact_Close__c', 'SLA', sla_first_contact_close_map) sla_time_open_map = { 'Compliant': lambda: choice([12, 24, 36, 48], p=[.50, .20, .20, .10]), 'Violation': lambda: choice([60, 72, 84, 96, 108, 120], p=[.60, .20, .10, .05, .03, .02]) } data_gen.add_map_column('Time_Open__c', 'SLA', sla_time_open_map) def region_formula(column_values): average_age = float(column_values['Average Age']) if average_age < 40: return 'West CSR' elif average_age >= 40.0 and average_age < 50: return 'Central CSR' else: return 'East CSR' data_gen.add_formula_column('Team__c', region_formula) def user_formula(column_values): average_age = float(column_values['Average Age']) if average_age < 40: return 'W_Services_User.' + str(choice([1, 2, 3, 4, 5])) elif average_age >= 40.0 and average_age < 50: return 'W_Services_User.' + str(choice([6, 7, 8, 9, 10, 11])) else: return 'W_Services_User.' + str(choice([12, 13, 14, 15, 16, 17])) data_gen.add_formula_column('Owner.External_Id__c', user_formula) # generate offer voucher - give vouchers to customers that were unhappy with Video Games or Cables to boost CSAT def offer_voucher_formula(column_values): csat = float(column_values['Profit Linear']) item = column_values['Item'] if item in ['Video Games', 'Cables']: return choice(['true', 'false'], p=[csat/100, (100 - csat) / 100]) else: return 'false' data_gen.add_formula_column('Offer_Voucher__c', offer_voucher_formula) def send_field_service_formula(column_values): csat = float(column_values['Profit Linear']) item = column_values['Item'] if csat >= 80.0 and item == 'Tablet': return 'true' else: return choice(['true', 'false'], p=[.25, .75]) data_gen.add_formula_column('Send_FieldService__c', send_field_service_formula) data_gen.add_map_column('IsEscalated', 'Tier', {'Tier 1': 'true', None: 'false'}) # generate close date offset # random offset covering the last 14 months data_gen.add_formula_column('close_date_offset', lambda: randint(1, 30 * 14)) # generate account id - generate a long tail distribution - cubic function +- randint # helper dataset used for account selection data_gen.add_dataset('current_account', {'account_id': 0, 'account_count': 0}) # generate a distribution of account ids def account_id_formula(column_values): current_account = data_gen.datasets['current_account'] account_id = current_account['account_id'] account_count = current_account['account_count'] if account_count > 0: # continue with the current account_id if there are still any to take # but first decrement account count account_count += -1 current_account['account_count'] = account_count else: # use new account id account_id += 1 account_count = int(round(lognormal(1))) + randint(1, 7) # update account dataset for next iteration account_count += -1 current_account['account_count'] = account_count current_account['account_id'] = account_id return 'W_Services_Account.' + str(account_id) data_gen.add_formula_column('Account.External_Id__c', account_id_formula) def csat_formula(column_values): # first normalize csat between 30-90 csat = float(column_values['Profit Linear']) new_delta = 70 csat = (new_delta * csat / 100) + 30 channel = column_values['Origin'] is_escalated = column_values['IsEscalated'] send_field_service = column_values['Send_FieldService__c'] offer_voucher = column_values['Offer_Voucher__c'] if is_escalated == 'true': if channel == 'Phone': csat = csat - 2 else: csat = csat + 2 if send_field_service == 'true': if channel == 'Phone': csat = csat - 2 else: csat = csat + 4 if offer_voucher == 'true': if channel == 'Phone': csat = csat - 2 else: csat = csat + 4 return csat data_gen.add_formula_column('CSAT__c', formula=csat_formula) data_gen.add_map_column('Outlier', 'Outlier', value_map={ 'TRUE': 'true', None: 'false' }) data_gen.apply_transformations() data_gen.add_map_column('Time_Open__c', 'First_Contact_Close__c', value_map={ 'true': 0, None: lambda cv: cv['Time_Open__c'] }) data_gen.apply_transformations() rename_map = { 'Item': 'Product_Family_KB__c' } data_gen.rename_columns(rename_map) output_columns = [ 'Origin', 'Store', 'Tier', 'Product_Family_KB__c', 'Priority', 'Average Age', 'Percent Male', 'SLA', 'Daily Revenue', 'Reason', 'Reg Price', 'Type_of_Support__c', 'Price', 'Quantity', 'Cost', 'Profit', 'CSAT__c', 'Profit Log', 'Outlier', 'Status', 'First_Contact_Close__c', 'Time_Open__c', 'Team__c', 'Owner.External_Id__c', 'close_date_offset', 'Account.External_Id__c', 'Offer_Voucher__c', 'Send_FieldService__c', 'IsEscalated' ] data_gen.write(output_file_name, output_columns)
def generate_status_file(source_file, original_status_file, tmp_folder=today.isoformat(), file_name=''): """Takes a CSV file and generates another file containing mappings of External_Ids and intermediate and final statuses for the records. Some MFG objects require intermediate status. Parameters ---------- source_file : str The name of the file (including the path) to be processed. File must have Id, External_Id__c and Status. original_status_file : str The file name (including path) of the CSV file containing the orginal Id and Status. tmp_folder : str Name of the folder for archive path. Default = today's date. file_name : str Name of Status file. Returns ------- None Generates file(s) with corresponding statuses. Note that for SalesAgreements multiple files are generated and the name of these files will depend on the intermediate status name. """ def get_original_final_status(status_by_id, record_id): return status_by_id[record_id][0].get('Status') data_gen = DataGenerator() data_gen.load_source_file(source_file, ['Id', 'External_Id__c', 'Status']) object_status = data_gen.load_dataset('object_status', original_status_file, ['Id', 'Status']) status_by_id = object_status.group_by('Id') ## Order and Contract records can go from DRAFT directly to their final status ## That is why only one file containing all final statuses is needed ## Note: For reference, an ALL status file, containing SAs and their final status, is created for SalesAgreements new_rows = [] row_count = len(data_gen.rows) for r in range(row_count): row = data_gen.rows.pop() column_values = data_gen.row_to_column_values(row) column_values['Status'] = get_original_final_status( status_by_id, column_values['Id']) new_rows.append(data_gen.column_values_to_row(column_values)) data_gen.rows = new_rows data_gen.reverse() # write file containing all statuses status_file = definitions.mfg_temporal_path.format(tmp_folder) + file_name data_gen.write(status_file, columns=['External_Id__c', 'Id', 'Status']) # clear objects new_rows.clear() status_by_id.clear() data_gen.remove_dataset('object_status') if file_name == 'SalesAgreement.status.ALL.csv': ## SalesAgreements require to be updated in steps up to their final status ## 1> Draft (already inserted) ## 2> Approved (all records but the ones with final_status=Draft must be updated to Approved) ## 3> Discarded (only records with final status as Discarded) ## 4> Cancelled (only records with final status as Cancelled) ## 5> Expired (only records with final status as Expired) ## Note: Some status like Activated are automatically handled in the Org once a record is Approved data_gen.load_source_file(status_file) data_gen.add_dataset('sa_all_status', copy.deepcopy(data_gen.rows)) for status_name in ['Approved', 'Discarded', 'Cancelled', 'Expired']: status_file = definitions.mfg_temporal_path.format( tmp_folder) + 'SalesAgreement.status.' + status_name.upper( ) + '.csv' if status_name == 'Approved': data_gen.add_constant_column('Status', 'Approved') data_gen.apply_transformations() else: data_gen.rows = copy.deepcopy( data_gen.datasets['sa_all_status']) data_gen.filter(lambda cv: cv['Status'] == status_name) if data_gen.row_count: data_gen.write(status_file, columns=['External_Id__c', 'Status']) else: print("No records for {}".format(status_file))