def generate(self, selected_filters=None, columns=None, count=5): if selected_filters is None: selected_filters = {} if columns is None: columns = self.get_columns() data_gen = DataGenerator() data_gen.row_count = count if 'gender' in selected_filters: if selected_filters['gender'] == 'male': data_gen.add_constant_column('Gender', 'Male') else: data_gen.add_constant_column('Gender', 'Female') else: data_gen.add_formula_column('Gender', formula=fake.gender) def first_name_formula(column_values): if column_values['Gender'] == 'Male': return fake.first_name_male() else: return fake.first_name_female() data_gen.add_formula_column('First Name', first_name_formula) data_gen.add_formula_column('Last Name', formula=fake.last_name) data_gen.add_formula_column('Name', lambda cv: cv['First Name'] + ' ' + cv['Last Name']) data_gen.apply_transformations() return list(map(lambda r: data_gen.row_to_column_values(r, columns).values(), data_gen.rows))
def run(batch_id, source_file_name, output_file_name): data_gen = DataGenerator() # load source file source_columns = [ 'KnowledgeArticle.External_Id__c', 'User.External_Id__c', 'CreatedDate__c' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('KnowledgeArticle.External_Id__c', 'KCSArticle__ka.External_Id__c') data_gen.rename_column('User.External_Id__c', 'Owner.External_Id__c') data_gen.add_formula_column( 'External_Id__c', formula=lambda: 'W_KCSArticleVersion.' + str(data_gen.current_row + 1)) data_gen.add_formula_column('ArticleNumber__c', lambda: data_gen.current_row + 1) data_gen.add_formula_column('PublishStatus__c', ['Archived', 'Online']) data_gen.add_constant_column('IsLatestVersion__c', 'true') data_gen.add_constant_column('IsVisibleInApp__c', 'true') data_gen.add_constant_column('IsVisibleInCsp__c', 'true') data_gen.add_constant_column('IsVisibleInPkb__c', 'true') data_gen.add_constant_column('IsVisibleInPrm__c', 'true') data_gen.add_constant_column('VersionNumber__c', '1') data_gen.add_constant_column('Language__c', 'en_US') titles = [ "Health", "Computers", "Music", "Tools", "Home", "Outdoors", "Jewelery", "Toys", "Grocery", "Clothing", "Games", "Automotive", "Beauty", "Garden", "Books", "Industrial", "Baby", "Kids", "Movies", "Sports", "Shoes", "Electronics" ] data_gen.add_formula_column('Title__c', titles) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write file data_gen.apply_transformations() output_columns = [ 'External_Id__c', 'ArticleNumber__c', 'CreatedDate__c', 'Owner.External_Id__c', 'PublishStatus__c', 'IsLatestVersion__c', 'IsVisibleInApp__c', 'IsVisibleInCsp__c', 'IsVisibleInPkb__c', 'IsVisibleInPrm__c', 'KCSArticle__ka.External_Id__c', 'Title__c', 'VersionNumber__c', 'Language__c', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name): data_gen = DataGenerator() # load source file source_columns = [ 'External_Id__c', 'Owner.External_Id__c', 'CreatedDate__c' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('External_Id__c', 'Case.External_Id__c') data_gen.rename_column('Owner.External_Id__c', 'User.External_Id__c') # todo one case article per case? at most 1? distribution? data_gen.duplicate_rows( duplication_factor=lambda: choice([0, 1], p=[.75, .25])) data_gen.add_formula_column( 'External_Id__c', formula=lambda: 'W_CaseArticle.' + str(data_gen.current_row + 1)) data_gen.add_formula_column( 'KnowledgeArticle.External_Id__c', formula=lambda: 'W_KCSArticle.' + str(data_gen.current_row + 1)) data_gen.add_constant_column('ArticleVersionNumber__c', 1) data_gen.add_constant_column('IsSharedByEmail__c', ['true', 'false']) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write file data_gen.apply_transformations() output_columns = [ 'External_Id__c', 'User.External_Id__c', 'ArticleVersionNumber__c', 'CreatedDate__c', 'KnowledgeArticle.External_Id__c', 'IsSharedByEmail__c', 'Case.External_Id__c', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name, reference_date=today): data_gen = DataGenerator() # load source file source_columns = ['External_Id__c', 'Name', 'UserRole.Name'] data_gen.load_source_file(source_file_name, source_columns) data_gen.filter(lambda cv: 'RVP' not in cv['UserRole.Name']) data_gen.filter( lambda cv: 'CSM' not in cv['UserRole.Name']) # comes from Service data_gen.rename_column('External_Id__c', 'QuotaOwner_Id__c') data_gen.rename_column('Name', 'OwnerName__c') # generate id data_gen.add_formula_column( 'External_Id__c', formula=lambda: 'W_Quota.' + str(data_gen.current_row + 1)) data_gen.duplicate_rows(24) def quota_formula(): # first month of quarter = 300k # second month of quarter = 500k # third month of quarter = 500k quarter = data_gen.current_row % 3 if quarter == 0: return 300000 elif quarter == 1: return 750000 else: return 500000 data_gen.add_formula_column('QuotaAmount__c', quota_formula) current_year = reference_date.year last_year = current_year - 1 def start_date_formula(): user_row = data_gen.current_row % 24 month = str((user_row % 12) + 1).zfill(2) day = '01' if user_row < 12: year = str(last_year) else: year = str(current_year) return dateutil.parser.parse(year + '-' + month + '-' + day).date() data_gen.add_formula_column('StartDate__c', start_date_formula) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write file data_gen.apply_transformations() data_gen.write(output_file_name, [ 'External_Id__c', 'QuotaOwner_Id__c', 'OwnerName__c', 'StartDate__c', 'QuotaAmount__c' ])
def run(batch_id, source_file_name, output_file_name): data_gen = DataGenerator() # load source file source_columns = ['KnowledgeArticle.External_Id__c', 'CreatedDate__c'] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('KnowledgeArticle.External_Id__c', 'External_Id__c') data_gen.add_formula_column('ArticleNumber__c', lambda: data_gen.current_row + 1) data_gen.add_formula_column('CaseAssociationCount__c', lambda: randint(1, 6)) def first_published_date_formula(column_values): create_date = dateutil.parser.parse(column_values['CreatedDate__c']) return (create_date + timedelta(days=randint(1, 10))).isoformat(sep=' ') data_gen.add_formula_column('FirstPublishedDate__c', formula=first_published_date_formula) def last_published_date_formula(column_values): first_publised_date = dateutil.parser.parse( column_values['FirstPublishedDate__c']) return (first_publised_date + timedelta(days=randint(1, 10))).isoformat(sep=' ') data_gen.add_formula_column('LastPublishedDate__c', formula=last_published_date_formula) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write file data_gen.apply_transformations() output_columns = [ 'ArticleNumber__c', 'External_Id__c', 'CaseAssociationCount__c', 'CreatedDate__c', 'FirstPublishedDate__c', 'LastPublishedDate__c', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name): data_gen = DataGenerator() # load source file source_columns = [ 'KnowledgeArticle.External_Id__c', 'User.External_Id__c', 'CreatedDate__c' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('KnowledgeArticle.External_Id__c', 'Parent.External_Id__c') data_gen.rename_column('User.External_Id__c', 'Owner.External_Id__c') data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_KCSArticle_ViewStat.' + str(data_gen.current_row + 1)) channels = [ 'App', 'Desktop Site', 'Mobile Site' ] data_gen.add_formula_column('Channel__c', channels) data_gen.add_formula_column('ViewCount__c', formula=lambda: randint(1, 100)) data_gen.add_formula_column('NormalizedScore__c', formula=lambda: round(uniform(1, 10), 3)) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write file data_gen.apply_transformations() output_columns = [ 'External_Id__c', 'Channel__c', 'Parent.External_Id__c', 'ViewCount__c', 'NormalizedScore__c', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name): data_gen = DataGenerator() # load source file account_columns = ['External_Id__c'] data_gen.load_source_file(source_file_name, account_columns) data_gen.rename_column('External_Id__c', 'Account.External_Id__c') data_gen.add_formula_column( 'External_Id__c', lambda cv: cv['Account.External_Id__c'].replace( 'W_Account', 'W_Contact')) data_gen.add_formula_column('FirstName', formula=fake.first_name) data_gen.add_formula_column('LastName', formula=fake.last_name) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) data_gen.apply_transformations() data_gen.write(output_file_name)
def run(batch_id, source_file_name, output_file_name, reference_datetime=today): data_gen = DataGenerator() # load source file source_columns = [ 'External_Id__c', 'Owner.External_Id__c', 'CreatedDate__c', 'LastActivityDate__c' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('External_Id__c', 'Case.External_Id__c') data_gen.duplicate_rows(duplication_factor=lambda: randint(0, 3)) data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_Services_Event.' + str(data_gen.current_row + 1)) data_gen.add_formula_column('Subject', formula=event.event_subject) data_gen.add_formula_column('EventSubtype', formula=event.event_subtype) data_gen.add_formula_column('DurationInMinutes', formula=event.event_call_duration) def create_date_formula(column_values): case_create_date = dateutil.parser.parse(column_values['CreatedDate__c']) case_close_date = datetime.combine(dateutil.parser.parse(column_values['LastActivityDate__c']), case_create_date.time()) if case_close_date > reference_datetime: case_close_date = reference_datetime create_date = fake.date_time_between_dates(case_create_date, case_close_date) if create_date > reference_datetime: create_date = reference_datetime return create_date.isoformat(sep=' ') data_gen.add_formula_column('CreatedDate__c', create_date_formula) data_gen.add_copy_column('LastModifiedDate__c', 'CreatedDate__c') def activity_date_formula(column_values): create_date = dateutil.parser.parse(column_values['CreatedDate__c']).date() return (create_date + timedelta(days=randint(0, 14))).isoformat() data_gen.add_formula_column('ActivityDate', activity_date_formula) def activity_datetime_formula(column_values): return dateutil.parser.parse(column_values['ActivityDate']) data_gen.add_formula_column('ActivityDateTime', activity_datetime_formula) data_gen.add_constant_column('ShowAs', 'Busy') # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write data_gen.apply_transformations() output_columns = [ 'External_Id__c', 'Owner.External_Id__c', 'Case.External_Id__c', 'Subject', 'EventSubtype', 'DurationInMinutes', 'ShowAs', 'CreatedDate__c', 'LastModifiedDate__c', 'ActivityDate', 'ActivityDateTime', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name, reference_datetime=today): data_gen = DataGenerator() # load source file source_columns = [ 'External_Id__c', 'Owner.External_Id__c', 'CreatedDate__c', 'ClosedDate__c', 'Origin' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('External_Id__c', 'Case.External_Id__c') data_gen.rename_column('ClosedDate__c', 'EndTime__c') data_gen.duplicate_rows(duplication_factor=lambda: choice( [1, 2, 3, 4, 5], p=[.65, .15, .10, .05, .05])) data_gen.add_formula_column( 'External_Id__c', lambda: 'W_LiveChatTranscript.' + str(data_gen.current_row + 1)) data_gen.add_formula_column('Abandoned__c', lambda: randint(1, 300)) data_gen.add_formula_column('AverageResponseTimeOperator__c', lambda: randint(1, 180)) data_gen.add_formula_column('AverageResponseTimeVisitor__c', lambda: randint(1, 180)) data_gen.add_formula_column('Body__c', formula=fake.body) data_gen.add_formula_column('Browser__c', formula=fake.browser) data_gen.add_constant_column('BrowserLanguage__c', 'en_US') data_gen.add_formula_column('ChatDuration__c', lambda: randint(1, 600)) data_gen.add_formula_column('ChatKey__c', formula=fake.md5) data_gen.add_formula_column('IpAddress__c', formula=fake.ipv4) data_gen.add_formula_column('LiveChatButton.DeveloperName', ['Public_Website_Chat_Button']) data_gen.add_formula_column('Location__c', formula=fake.city) data_gen.add_formula_column('MaxResponseTimeOperator__c', lambda: randint(1, 120)) data_gen.add_formula_column('MaxResponseTimeVisitor__c', lambda: randint(1, 240)) data_gen.add_formula_column('Name__c', lambda: str(data_gen.current_row + 1).zfill(8)) data_gen.add_formula_column('OperatorMessageCount__c', lambda: randint(1, 100)) data_gen.add_formula_column( 'Platform__c', ['MacOSX', 'iOS', 'Android', 'Windows', 'Unix']) referrer = [ "https://na17.salesforce.com/setup/forcecomHomepage.apexp?setupid=ForceCom&retURL=%2Fui%2Fsupport%2Fservicedesk%2FServiceDeskPage", "https://na13.salesforce.com/home/home.jsp", "https://sdodemo-main.force.com/partners/servlet/servlet.Integration?lid=01ra0000001VlbA&ic=1", "https://sitestudio.na17.force.com/?exitURL=%2F_ui%2Fnetworks%2Fsetup%2FSetupNetworksPage%2Fd", "https://mail.google.com/mail/u/0/", "https://sdodemo-main.force.com/customers/servlet/servlet.Integration?lid=01ra0000001VlbP&ic=1", "https://sdodemo-main.force.com/consumers/servlet/servlet.Integration?lid=01ro0000000EN78&ic=1", "https://na17.salesforce.com/servlet/servlet.su?oid=00D300000007EfQ&retURL=%2F0033000000PuxU2&sunetworkuserid=005a000000AuCha&sunetworkid=0DBo0000000Gn4h", "https://sdodemo-main.force.com/customers/servlet/servlet.Integration?ic=1&lid=01ra0000001VlbP" ] data_gen.add_formula_column('ReferrerUri__c', referrer) def create_date_formula(column_values): case_create_date = dateutil.parser.parse( column_values['CreatedDate__c']) case_close_date = dateutil.parser.parse(column_values['EndTime__c']) create_date = fake.date_time_between_dates(case_create_date, case_close_date) if create_date > reference_datetime: create_date = reference_datetime return create_date.isoformat(sep=' ') data_gen.add_formula_column('CreatedDate__c', create_date_formula) def start_time_formula(column_values): create_date = dateutil.parser.parse(column_values['CreatedDate__c']) start_time = create_date + timedelta(seconds=randint(1, 300)) return start_time.isoformat(sep=' ') data_gen.add_formula_column('StartTime__c', start_time_formula) def end_time_formula(column_values): create_date = dateutil.parser.parse(column_values['StartTime__c']) end_time = create_date + timedelta(seconds=randint(1, 600)) return end_time.isoformat(sep=' ') data_gen.add_formula_column('EndTime__c', end_time_formula) data_gen.add_copy_column('RequestTime__c', 'CreatedDate__c') data_gen.add_formula_column( 'Status__c', lambda: choice(['Missed', 'Completed'], p=[.20, .80])) data_gen.add_map_column('EndedBy__c', 'Status__c', { 'Completed': ['Visitor', 'Agent'], None: 'Visitor' }) data_gen.add_constant_column('SupervisorTranscriptBody__c', '') data_gen.add_constant_column('ScreenResolution__c', '') data_gen.add_formula_column('UserAgent__c', formula=fake.user_agent) data_gen.add_formula_column('VisitorMessageCount__c', lambda: randint(1, 50)) data_gen.add_formula_column('WaitTime__c', lambda: randint(1, 120)) def last_referenced_date_formula(column_values): create_date = dateutil.parser.parse(column_values['CreatedDate__c']) last_referenced_date = create_date + timedelta(seconds=randint(1, 300)) return last_referenced_date.isoformat(sep=' ') data_gen.add_formula_column('LastReferencedDate__c', last_referenced_date_formula) data_gen.add_copy_column('LastViewedDate__c', 'LastReferencedDate__c') # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) def filter_func(column_values): return column_values['Origin'] == 'Chat' data_gen.filter(filter_function=filter_func) # apply transformations and write file data_gen.apply_transformations() data_gen.sort_by('StartTime__c') output_columns = [ 'External_Id__c', 'Abandoned__c', 'AverageResponseTimeOperator__c', 'MaxResponseTimeOperator__c', 'OperatorMessageCount__c', 'Body__c', 'Browser__c', 'BrowserLanguage__c', 'Case.External_Id__c', 'ChatDuration__c', 'ChatKey__c', 'CreatedDate__c', 'StartTime__c', 'EndTime__c', 'EndedBy__c', 'LastReferencedDate__c', 'LastViewedDate__c', 'LiveChatButton.DeveloperName', 'Location__c', 'Owner.External_Id__c', 'Platform__c', 'ReferrerUri__c', 'ScreenResolution__c', 'RequestTime__c', 'Status__c', 'SupervisorTranscriptBody__c', 'UserAgent__c', 'AverageResponseTimeVisitor__c', 'IpAddress__c', 'MaxResponseTimeVisitor__c', 'VisitorMessageCount__c', 'WaitTime__c', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name, reference_datetime=today): data_gen = DataGenerator() # load source file account_columns = [ 'External_Id__c', 'Owner.External_Id__c', 'OpportunityCloseDate__c' ] data_gen.load_source_file(source_file_name, account_columns) data_gen.rename_column('External_Id__c', 'Account.External_Id__c') data_gen.rename_column('OpportunityCloseDate__c', 'CreatedDate__c') # generate a random number of cases per account data_gen.duplicate_rows( duplication_factor=lambda: int(lognormal(0) + randint(0, 2))) data_gen.add_formula_column( 'External_Id__c', formula=lambda: 'W_Sales_Case.' + str(data_gen.current_row + 1)) # generate contact def contact_formula(column_values): return column_values['Account.External_Id__c'].replace( 'W_Account', 'W_Contact') data_gen.add_formula_column('Contact.External_Id__c', contact_formula) data_gen.add_formula_column('IsEscalated', case.case_is_escalated) data_gen.add_formula_column('CSAT__c', case.case_csat) data_gen.add_formula_column('Origin', formula=case.case_origin) data_gen.add_formula_column('Type', formula=case.case_type) data_gen.add_formula_column('Subject', formula=case.case_subject) data_gen.add_formula_column('Priority', formula=case.case_priority) data_gen.add_formula_column('Status', formula=case.case_status) def create_date_formula(column_values): oppty_close_date = dateutil.parser.parse( column_values['CreatedDate__c']) create_date = oppty_close_date + timedelta(days=randint(0, 90)) if create_date > reference_datetime: create_date = reference_datetime return create_date.isoformat(sep=' ') data_gen.add_formula_column('CreatedDate__c', create_date_formula) def close_date_formula(column_values): create_date = dateutil.parser.parse(column_values['CreatedDate__c']) if column_values['Status'] == 'Closed': close_date = create_date + timedelta(days=randint(0, 10)) if close_date > reference_datetime: close_date = reference_datetime return close_date.isoformat(sep=' ') else: return '' data_gen.add_formula_column('ClosedDate__c', close_date_formula) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) data_gen.apply_transformations() data_gen.write(output_file_name)
def generate(request): data_gen = DataGenerator() count = int(request.data['count']) type = request.data['type'] data_gen.row_count = count if type == 'name': data_gen.add_formula_column('name', formula=fake.name) elif type == 'name_male': data_gen.add_formula_column('name_male', formula=fake.name_male) elif type == 'name_female': data_gen.add_formula_column('name_female', formula=fake.name_female) elif type == 'first_name': data_gen.add_formula_column('first_name', formula=fake.first_name) elif type == 'first_name_male': data_gen.add_formula_column('first_name_male', formula=fake.first_name_male) elif type == 'first_name_female': data_gen.add_formula_column('first_name_female', formula=fake.first_name_female) elif type == 'last_name': data_gen.add_formula_column('last_name', formula=fake.last_name) elif type == 'last_name_male': data_gen.add_formula_column('last_name_male', formula=fake.last_name_male) elif type == 'last_name_female': data_gen.add_formula_column('first_name_female', formula=fake.first_name_female) elif type == 'prefix': data_gen.add_formula_column('prefix', formula=fake.prefix) elif type == 'prefix_male': data_gen.add_formula_column('prefix_male', formula=fake.prefix_male) elif type == 'prefix_female': data_gen.add_formula_column('prefix_female', formula=fake.prefix_female) elif type == 'suffix': data_gen.add_formula_column('suffix', formula=fake.suffix) elif type == 'suffix_male': data_gen.add_formula_column('suffix_male', formula=fake.suffix_male) elif type == 'suffix_female': data_gen.add_formula_column('suffix_female', formula=fake.suffix_female) elif type == 'phone_number': data_gen.add_formula_column('phone_number', formula=fake.phone_number) elif type == 'ssn': data_gen.add_formula_column('ssn', formula=fake.ssn) elif type == 'address': data_gen.add_formula_column('address', formula=fake.address) elif type == 'building_number': data_gen.add_formula_column('building_number', formula=fake.building_number) elif type == 'street_name': data_gen.add_formula_column('street_name', formula=fake.street_name) elif type == 'street_address': data_gen.add_formula_column('street_address', formula=fake.street_address) elif type == 'street_suffix': data_gen.add_formula_column('street_suffix', formula=fake.street_suffix) elif type == 'secondary_address': data_gen.add_formula_column('secondary_address', formula=fake.secondary_address) elif type == 'city': data_gen.add_formula_column('city', formula=fake.city) elif type == 'state': data_gen.add_formula_column('state', formula=fake.state) elif type == 'state_abbr': data_gen.add_formula_column('state_abbr', formula=fake.state_abbr) elif type == 'zipcode': data_gen.add_formula_column('zipcode', formula=fake.zipcode) elif type == 'zipcode_plus4': data_gen.add_formula_column('zipcode_plus4', formula=fake.zipcode_plus4) elif type == 'country': data_gen.add_formula_column('country', formula=fake.country) elif type == 'country_code': data_gen.add_formula_column('country_code', formula=fake.country_code) elif type == 'company': data_gen.add_formula_column('company', formula=fake.company) elif type == 'job': data_gen.add_formula_column('job', formula=fake.job) elif type == 'ipv4': data_gen.add_formula_column('ipv4', formula=fake.ipv4) elif type == 'ipv6': data_gen.add_formula_column('ipv6', formula=fake.ipv6) elif type == 'url': data_gen.add_formula_column('url', formula=fake.url) elif type == 'free_email': data_gen.add_formula_column('free_email', formula=fake.free_email) elif type == 'safe_email': data_gen.add_formula_column('safe_email', formula=fake.safe_email) elif type == 'company_email': data_gen.add_formula_column('company_email', formula=fake.company_email) elif type == 'browser': data_gen.add_formula_column('browser', formula=fake.browser) elif type == 'md5': data_gen.add_formula_column('md5', formula=fake.md5) elif type == 'user_agent': data_gen.add_formula_column('user_agent', formula=fake.user_agent) elif type == 'sentence': data_gen.add_formula_column('sentence', formula=fake.sentence) else: data_gen.add_formula_column('name', formula=fake.name) data_gen.apply_transformations() flat = [val for sublist in data_gen.rows for val in sublist] return Response(flat)
def run(batch_id, source_file_name, output_file_name): data_gen = DataGenerator() # load source file source_columns = ['AccountExternalId__c', 'AccountName__c'] data_gen.load_source_file(source_file_name, source_columns) # rename columns data_gen.rename_column('AccountExternalId__c', 'External_Id__c') data_gen.rename_column('AccountName__c', 'Name') # filter out duplicate data data_gen.unique() # load shape data as dataset shape_columns = [ 'AccountExternalId__c', 'AccountAnnualRevenue__c', 'AccountNumberOfEmployees__c', 'AccountBookings__c', 'Region__c', 'Owner.External_Id__c', 'CloseDate', 'CreatedDate__c'] shape_dataset = data_gen.load_dataset('shape', source_file_name, shape_columns) # build map of account values shape_account_map = shape_dataset.group_by('AccountExternalId__c') # helper method to get shape data related to an account def get_shape_data(column_values, shape_column_name): return shape_account_map.get(column_values['External_Id__c'])[0].get(shape_column_name) # generate owner def owner_formula(column_values): return get_shape_data(column_values, 'Owner.External_Id__c') data_gen.add_formula_column('Owner.External_Id__c', owner_formula) # update number employees based on shape data def employees_formula(column_values): employees = get_shape_data(column_values, 'AccountNumberOfEmployees__c') return randint(*account.client_size_employees_bands[employees]) data_gen.add_formula_column('NumberOfEmployees', employees_formula) # update annual revenue based on shape data def revenue_formula(column_values): revenue = get_shape_data(column_values, 'AccountAnnualRevenue__c') return 1000 * randint(*account.client_size_rev_bands[revenue]) data_gen.add_formula_column('AnnualRevenue', revenue_formula) # generate account source data_gen.add_formula_column('AccountSource', formula=account.account_source) # update type based on shape data def type_formula(column_values): return get_shape_data(column_values, 'AccountAnnualRevenue__c') data_gen.add_formula_column('Type', type_formula) # generate industry data_gen.add_formula_column('Industry', formula=account.account_industry) # generate billing street data_gen.add_formula_column('BillingStreet', formula=lambda: fake.building_number() + ' ' + fake.street_name()) # generate billing city data_gen.add_formula_column('BillingCity', formula=fake.city) # update billing state based on shape data def state_formula(column_values): region = get_shape_data(column_values, 'Region__c') return choice(account.region_state_map[region]) data_gen.add_formula_column('BillingState', state_formula) # generate billing country data_gen.add_constant_column('BillingCountry', 'USA') # generate year started data_gen.add_formula_column('YearStarted', formula=account.account_year_started) # generate ownership data_gen.add_formula_column('Ownership', formula=account.account_ownership) # generate rating data_gen.add_formula_column('Rating', formula=account.account_rating) # generate earliest created date def create_date_formula(column_values): opptys = shape_account_map.get(column_values['External_Id__c']) create_dates = [dateutil.parser.parse(oppty['CreatedDate__c']) for oppty in opptys] create_dates.sort() return create_dates[0] data_gen.add_formula_column('CreatedDate__c', create_date_formula) # generate earliest close date def close_date_formula(column_values): opptys = shape_account_map.get(column_values['External_Id__c']) close_dates = [dateutil.parser.parse(oppty['CloseDate']).date() for oppty in opptys] close_dates.sort() return close_dates[0] data_gen.add_formula_column('OpportunityCloseDate__c', close_date_formula) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write file data_gen.apply_transformations() data_gen.write(output_file_name)
def run(batch_id, source_file_name, output_file_name, reference_date=today_datetime, filter_function=None): def get_close_date(values): return dateutil.parser.parse(values['CloseDate']) def get_create_date(values): return dateutil.parser.parse(values['CreatedDate__c']) data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # add an age column data_gen.add_copy_column('Age__c', 'TimeToClose__c') # generate a close date def close_date_formula(column_values): last_day = date(date.today().year, 12, 31) offset = column_values['close_date_offset__c'] # last day of current year - offset close_date = last_day - timedelta(days=int(offset)) return str(close_date) data_gen.add_formula_column('CloseDate', close_date_formula) # generate a create date def create_date_formula(column_values): close_date = dateutil.parser.parse(column_values['CloseDate']) offset = column_values['TimeToClose__c'] create_date = close_date - timedelta(days=int(offset)) return create_date.isoformat(sep=' ') data_gen.add_formula_column('CreatedDate__c', create_date_formula) # generate last activity date def last_activity_date_formula(column_values): create_date = get_create_date(column_values) close_date = get_close_date(column_values) if close_date > reference_date: close_date = reference_date if create_date > reference_date: create_date = reference_date return fake.date_time_between_dates(create_date, close_date).date() data_gen.add_formula_column('LastActivityDate__c', formula=last_activity_date_formula) data_gen.apply_transformations() if filter_function: data_gen.filter(filter_function) new_rows = [] row_count = len(data_gen.rows) for i in range(row_count): row = data_gen.rows.pop() column_values = data_gen.row_to_column_values(row) close_day = get_close_date(column_values) create_day = get_create_date(column_values) # if close date is before reference date keep it exactly as is if close_day <= reference_date: new_rows.append(row) # if create date is before reference date, but the close date is after reference date elif (create_day <= reference_date) and (close_day > reference_date): # set age age = (reference_date - create_day).days column_values['Age__c'] = age ttc = float(column_values['TimeToClose__c']) pct = age / ttc # set IsClosed to blank column_values['IsClosed'] = '' # set IsWon to blank column_values['IsWon'] = '' # set a stage name stage_name_index = int(floor(pct * 4) + choice([-1, 0, 1], p=[.2, .7, .1])) # adjust the stage name index if stage_name_index < 0: stage_name_index = 0 if stage_name_index > 3: stage_name_index = 3 column_values['StageName'] = definitions.stage_name[stage_name_index] column_values['Probability'] = definitions.probabilities[stage_name_index] column_values['ForecastCategory'] = definitions.forecast_category[choice([1, 2, 4], p=[.625, .25, .125])] column_values['ForecastCategoryName'] = definitions.forecast_category_name[column_values['ForecastCategory']] column_values['SalesStageCount__c'] = ceil(pct * float(column_values['SalesStageCount__c'])) new_rows.append(data_gen.column_values_to_row(column_values)) data_gen.rows = new_rows data_gen.reverse() data_gen.write(output_file_name)
def run(batch_id, source_file_name, output_file_name, reference_date=today_datetime): def get_close_date(values): return dateutil.parser.parse(values['ClosedDate__c']) def get_create_date(values): return dateutil.parser.parse(values['CreatedDate__c']) data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) # calculate dates def close_date_formula(column_values): last_day = date(date.today().year, 12, 31) last_day = datetime.combine(last_day, datetime.min.time()) offset = column_values['close_date_offset'] # last day of current year - offset close_date = last_day - timedelta(days=int(offset)) close_date = close_date + timedelta(hours=int(choice([9, 10, 11, 12, 13, 14, 15, 16, 17], p=[.12, .13, .13, .07, .09, .13, .13, .11, .09])), minutes=randint(0, 60), seconds=randint(0, 60)) return close_date.isoformat(sep=' ') data_gen.add_formula_column('ClosedDate__c', close_date_formula) def created_date_formula(column_values): time_open = int(column_values['Time_Open__c']) date_closed = dateutil.parser.parse(column_values['ClosedDate__c']) return (date_closed - timedelta(days=time_open)).isoformat(sep=' ') data_gen.add_formula_column('CreatedDate__c', created_date_formula) # generate last activity date def last_activity_date_formula(column_values): create_date = dateutil.parser.parse(column_values['CreatedDate__c']) close_date = dateutil.parser.parse(column_values['ClosedDate__c']) if close_date > today_datetime: close_date = today_datetime if create_date > today_datetime: create_date = today_datetime return fake.date_time_between_dates(create_date, close_date).date() data_gen.add_formula_column('LastActivityDate__c', formula=last_activity_date_formula) data_gen.apply_transformations() new_rows = [] row_count = len(data_gen.rows) for i in range(row_count): row = data_gen.rows.pop() column_values = data_gen.row_to_column_values(row) close_day = get_close_date(column_values) create_day = get_create_date(column_values) # if close date is before reference date keep it exactly as is if close_day <= reference_date: new_rows.append(row) # if create date is before reference date, but the close date is after reference date elif (create_day <= reference_date) and (close_day > reference_date): column_values['Status'] = choice([ 'New', 'Working', 'Waiting on Customer', 'Response Received', 'Escalated', 'Warning', 'Attention', 'On Hold', 'Closed in Community'], p=[ 0.20, 0.30, 0.10, 0.05, 0.10, 0.05, 0.05, 0.10, 0.05 ]) new_rows.append(data_gen.column_values_to_row(column_values)) data_gen.rows = new_rows data_gen.reverse() def milestone_status_formula(column_values): status = column_values['Status'] if status != 'Closed': status = 'Open' sla = column_values['SLA'] return status + ' - ' + sla data_gen.add_formula_column('MilestoneStatus__c', formula=milestone_status_formula) data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_Case.' + str(data_gen.current_row + 1)) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) data_gen.apply_transformations() data_gen.write(output_file_name)
def run(batch_id, source_file_name, output_file_name, products_file_name, pricebook_file_name): data_gen = DataGenerator() # load source file source_columns = ['External_Id__c', 'Product2Name__c', 'Amount'] data_gen.load_source_file(source_file_name, source_columns) # load datasets products = data_gen.load_dataset('products', products_file_name) products_by_name = products.group_by('Name') pricebook = data_gen.load_dataset('pricebook', pricebook_file_name) pricebook_by_product = pricebook.group_by('Product2.External_Id__c') # rename columns data_gen.rename_column('External_Id__c', 'Opportunity.External_Id__c') data_gen.rename_column('Amount', 'TotalPrice') data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_OpportunityLineItem.' + str(data_gen.current_row + 1)) # transform product name to code data_gen.add_formula_column('ProductCode', lambda cv: products_by_name[cv['Product2Name__c']][0]['ProductCode']) # generate product reference id data_gen.add_formula_column('Product2.External_Id__c', lambda cv: products_by_name[cv['Product2Name__c']][0]['External_Id__c']) # generate list price data_gen.add_formula_column('ListPrice', lambda cv: pricebook_by_product[cv['ProductCode']][0]['UnitPrice']) # generate pricebook reference id data_gen.add_formula_column('PricebookEntry.External_Id__c', lambda cv: pricebook_by_product[cv['ProductCode']][0]['External_Id__c']) # generate quantity def quanity_formula(column_values): total_price = int(column_values['TotalPrice']) list_price = int(column_values['ListPrice']) quantity = total_price / list_price if quantity <= 0: quantity = 1 return ceil(quantity) data_gen.add_formula_column('Quantity', quanity_formula) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write file data_gen.apply_transformations() output_columns = [ 'External_Id__c', 'Opportunity.External_Id__c', 'TotalPrice', 'PricebookEntry.External_Id__c', 'Quantity', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name, config, reference_date=today_datetime, filter_function=None): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) # generate external id col_name = config['externalIdColumnName'] data_gen.add_formula_column(col_name, formula=lambda: config['externalIdFormat'] + str(data_gen.current_row + 1)) # iterate through the columns to be mapped # load current foreign file # if replaceSourceColumn is true, replace the 'sourceColumn' by 'replacementColumnName' # retrieve 'foreignRetrieveColumn' where 'foreignMappingColumn' == 'sourceColumn' for mapCol in config['mappings']: if '.source.' in mapCol['foreignFile']: foreign_file = definitions.mfg_source_path + mapCol['foreignFile'] else: foreign_file = definitions.mfg_temporal_path.format( today.isoformat()) + mapCol['foreignFile'] foreignRetrieveColumn = mapCol['foreignRetrieveColumn'] sourceColumn = mapCol['sourceColumn'] aux_dataset = data_gen.load_dataset('aux', foreign_file) aux_by_id = aux_dataset.group_by('Id') def get_aux_data(column_values): if column_values[sourceColumn] == '': aux_data = '' else: aux_data = aux_by_id.get( column_values[sourceColumn])[0].get(foreignRetrieveColumn) return aux_data data_gen.add_formula_column(sourceColumn, formula=get_aux_data) data_gen.apply_transformations() if mapCol['replaceSourceColumn']: data_gen.rename_column(sourceColumn, mapCol['replacementColumnName']) # always empty the auxiliary lists aux_dataset = [] aux_by_id = [] # remove auxiliary dataset to free up memory if 'aux' in data_gen.datasets: data_gen.remove_dataset('aux') if 'Status' in data_gen.column_names: data_gen.add_constant_column('Status', 'Draft') # generate LastProcessedDate data_gen.add_constant_column('LastProcessedDate', today.isoformat()) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) data_gen.apply_transformations() if filter_function: data_gen.filter(filter_function) data_gen.write(output_file_name) # Now the creation of the status file begins tmp_folder = reference_date.strftime("%Y-%m-%d") if 'Contract.csv' in output_file_name: generate_status_file(source_file=output_file_name, original_status_file=definitions.source_contract, tmp_folder=tmp_folder, file_name='Contract.status.ALL.csv') elif 'Order.csv' in output_file_name: generate_status_file(source_file=output_file_name, original_status_file=definitions.source_order, tmp_folder=tmp_folder, file_name='Order.status.ALL.csv') elif 'SalesAgreement.csv' in output_file_name: generate_status_file( source_file=output_file_name, original_status_file=definitions.source_sales_agreement, tmp_folder=tmp_folder, file_name='SalesAgreement.status.ALL.csv')
def run(batch_id, source_file_name, output_file_name, accounts_file_name, contacts_file_name): data_gen = DataGenerator() # load source file source_columns = [ 'External_Id__c', 'AccountExternalId__c', 'Owner.External_Id__c', 'LeadSource', 'CloseDate', 'CreatedDate__c' ] data_gen.load_source_file(source_file_name, source_columns) # load accounts as dataset account_columns = [ 'External_Id__c', 'Name', 'BillingState', 'Industry' ] account_dataset = data_gen.load_dataset('accounts', accounts_file_name, account_columns) accounts_by_id = account_dataset.group_by('External_Id__c') # load contacts as dataset contact_columns = [ 'External_Id__c', 'FirstName', 'LastName' ] contact_dataset = data_gen.load_dataset('contacts', contacts_file_name, contact_columns) contacts_by_id = contact_dataset.group_by('External_Id__c') # helper method to get account data def get_account_data(column_values, account_column_name): return accounts_by_id.get(column_values['ConvertedAccount.External_Id__c'])[0].get(account_column_name) # helper method to get contact data def get_contact_data(column_values, contact_column_name): return contacts_by_id.get(column_values['ConvertedContact.External_Id__c'])[0].get(contact_column_name) # rename columns data_gen.rename_column('External_Id__c', 'ConvertedOpportunity.External_Id__c') data_gen.rename_column('AccountExternalId__c', 'ConvertedAccount.External_Id__c') data_gen.rename_column('CloseDate', 'ConvertedDate__c') # generate converted lead at a random ratio data_gen.duplicate_rows(duplication_factor=lambda: choice([0, 1], p=[.75, .25])) # generate id data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_Lead.' + str(data_gen.current_row + 1)) # generate create date def create_date_formula(column_values): oppty_create_date = dateutil.parser.parse(column_values['CreatedDate__c']) return oppty_create_date - timedelta(days=randint(0, 45)) data_gen.add_formula_column('CreatedDate__c', create_date_formula) # generate status data_gen.add_formula_column('Status', formula=lead.lead_status) # generate status data_gen.add_map_column('IsConverted', 'Status', { 'Qualified - Convert': 'true', None: 'false' }) # generate opportunity data_gen.add_map_column('ConvertedOpportunity.External_Id__c', 'Status', { 'Qualified - Convert': lambda cv: cv['ConvertedOpportunity.External_Id__c'], None: '' }) # generate account data_gen.add_map_column('ConvertedAccount.External_Id__c', 'Status', { 'Qualified - Convert': lambda cv: cv['ConvertedAccount.External_Id__c'], None: '' }) # generate contact data_gen.add_map_column('ConvertedContact.External_Id__c', 'Status', { 'Qualified - Convert': lambda cv: cv['ConvertedAccount.External_Id__c'].replace('W_Account', 'W_Contact'), None: '' }) # generate converted date data_gen.add_map_column('ConvertedDate__c', 'Status', { 'Qualified - Convert': lambda cv: cv['ConvertedDate__c'], None: '' }) # generate name data_gen.add_map_column('FirstName', 'Status', { 'Qualified - Convert': lambda cv: get_contact_data(cv, 'FirstName'), None: lambda: fake.first_name() }) data_gen.add_map_column('LastName', 'Status', { 'Qualified - Convert': lambda cv: get_contact_data(cv, 'LastName'), None: lambda: fake.last_name() }) # generate company data_gen.add_map_column('Company', 'Status', { 'Qualified - Convert': lambda cv: get_account_data(cv, 'Name'), None: 'Not Applicable' }) # generate industry data_gen.add_map_column('Industry', 'Status', { 'Qualified - Convert': lambda cv: get_account_data(cv, 'Industry'), None: '' }) # generate state data_gen.add_map_column('State', 'Status', { 'Qualified - Convert': lambda cv: get_account_data(cv, 'BillingState'), None: '' }) # generate is unread by owner data_gen.add_map_column('IsUnreadByOwner', 'Status', { 'Qualified - Convert': 'false', None: lead.lead_is_unread_by_owner }) # generate rating data_gen.add_formula_column('Rating', formula=lead.lead_rating) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write file data_gen.apply_transformations() data_gen.write(output_file_name)
def run(batch_id, source_file_name, output_file_name, reference_date=today_datetime): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name, ['External_Id__c']) data_gen.rename_column('External_Id__c', 'User.External_Id__c') data_gen.add_copy_column('Owner.External_Id__c', 'User.External_Id__c') data_gen.duplicate_rows(duplication_factor=lambda: int(normal(60, 10))) data_gen.add_formula_column( 'External_Id__c', lambda: 'W_UserServicePresence.' + str(data_gen.current_row + 1)) data_gen.add_formula_column('AtCapacityDuration__c', lambda: randint(30, 900)) data_gen.add_formula_column('AverageCapacity__c', lambda: randint(30, 500)) data_gen.add_formula_column('ConfiguredCapacity__c', lambda: randint(30, 600)) start_date = reference_date - timedelta(days=365) end_date = reference_date data_gen.add_formula_column( 'CreatedDate__c', lambda: fake.date_time_between_dates( start_date, end_date).isoformat(sep=' ')) data_gen.add_formula_column('IdleDuration__c', lambda: randint(30, 600)) data_gen.add_formula_column('IsCurrentState__c', lambda: choice(['true', 'false'])) data_gen.add_formula_column('IsAway__c', lambda: choice(['true', 'false'])) data_gen.add_formula_column('StatusDuration__c', lambda: randint(30, 900)) data_gen.add_copy_column('StatusStartDate__c', 'CreatedDate__c') def status_end_date_formula(column_values): start_date = dateutil.parser.parse(column_values['StatusStartDate__c']) status_duration = int(column_values['StatusDuration__c']) return (start_date + timedelta(seconds=status_duration)).isoformat(sep=' ') data_gen.add_formula_column('StatusEndDate__c', formula=status_end_date_formula) data_gen.add_formula_column('ServicePresenceStatus.DeveloperName', [ 'Busy', 'Online', 'Available_Live_Agent', 'Busy_Break', 'Busy_Lunch', 'Busy_Training', 'Available_LiveMessage' ]) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) data_gen.apply_transformations() output_columns = [ 'External_Id__c', 'User.External_Id__c', 'Owner.External_Id__c', 'AtCapacityDuration__c', 'AverageCapacity__c', 'ConfiguredCapacity__c', 'CreatedDate__c', 'IdleDuration__c', 'IsAway__c', 'IsCurrentState__c', 'StatusDuration__c', 'StatusStartDate__c', 'StatusEndDate__c', 'ServicePresenceStatus.DeveloperName', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name, source_accounts, source_service_resources, source_service_territories, source_work_orders, reference_datetime=today): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) data_gen.add_formula_column( 'EarliestStartTime', lambda cv: dateutil.parser.parse(cv['EarliestStartTime'])) data_gen.apply_transformations() data_gen.sort_by('EarliestStartTime', reverse=True) # shift dates to be 2 weeks prior to the reference date delta = reference_datetime.date() - data_gen.row_to_column_values( data_gen.rows[0])['EarliestStartTime'].date() data_gen.add_formula_column( 'EarliestStartTime', lambda cv: (cv['EarliestStartTime'] + timedelta( days=delta.days - 1)).replace(tzinfo=None)) data_gen.add_formula_column( 'ActualStartTime', lambda cv: "" if cv['ActualStartTime'] == "" else (dateutil.parser.parse(cv['ActualStartTime']) + timedelta( days=delta.days - 1)).replace(tzinfo=None)) data_gen.add_formula_column( 'ActualEndTime', lambda cv: "" if cv['ActualEndTime'] == "" else (dateutil.parser.parse(cv['ActualEndTime']) + timedelta( days=delta.days - 1)).replace(tzinfo=None)) data_gen.add_formula_column( 'ArrivalWindowStartTime', lambda cv: "" if cv['ArrivalWindowStartTime'] == "" else (dateutil.parser.parse(cv['ArrivalWindowStartTime']) + timedelta( days=delta.days - 1)).replace(tzinfo=None)) data_gen.add_formula_column( 'ArrivalWindowEndTime', lambda cv: "" if cv['ArrivalWindowEndTime'] == "" else (dateutil.parser.parse(cv['ArrivalWindowEndTime']) + timedelta( days=delta.days - 1)).replace(tzinfo=None)) data_gen.add_formula_column( 'DueDate', lambda cv: "" if cv['DueDate'] == "" else (dateutil.parser.parse(cv[ 'DueDate']) + timedelta(days=delta.days - 1)).replace(tzinfo=None)) data_gen.apply_transformations() data_gen.add_copy_column('CreatedDate__c', 'EarliestStartTime') accounts = data_gen.load_dataset("Accounts", source_accounts, ['Id', 'External_ID__c']).dict( 'Id', 'External_ID__c') data_gen.add_map_column('Account.External_Id__c', 'AccountId', accounts) service_resources = data_gen.load_dataset("ServiceResources", source_service_resources, ['Id', 'External_ID__c']).dict( 'Id', 'External_ID__c') data_gen.add_map_column('ServiceResource.External_Id__c', 'FSLDemoTools_Service_Resource__c', service_resources) service_territories = data_gen.load_dataset("ServiceTerritories", source_service_territories, ['Id', 'External_ID__c']).dict( 'Id', 'External_ID__c') data_gen.add_map_column('ServiceTerritory.External_Id__c', 'ServiceTerritoryId', service_territories) work_orders = data_gen.load_dataset("WorkOrders", source_work_orders, ['Id', 'External_ID__c']).dict( 'Id', 'External_ID__c') data_gen.add_map_column('WorkOrder.External_Id__c', 'ParentRecordId', work_orders) data_gen.apply_transformations() data_gen.filter( lambda cv: cv['WorkOrder.External_Id__c'].startswith('WO.')) data_gen.apply_transformations() data_gen.write( output_file_name, columns=[ 'External_ID__c', 'CreatedDate__c', 'ServiceResource.External_Id__c', 'ServiceTerritory.External_Id__c', 'WorkOrder.External_Id__c', 'ActualStartTime', 'ArrivalWindowStartTime', 'ActualDuration', 'EarliestStartTime', 'Duration', 'DurationType', 'Status', 'DueDate', 'ActualEndTime', 'ArrivalWindowEndTime' ]) return delta
def run(batch_id, source_file_name, output_file_name, reference_datetime=today): data_gen = DataGenerator() # load source file source_columns = [ 'External_Id__c', 'Owner.External_Id__c', 'CreatedDate__c', 'LastActivityDate__c', 'Team__c' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('External_Id__c', 'Case.External_Id__c') data_gen.rename_column('LastActivityDate__c', 'ActivityDate') data_gen.rename_column('Team__c', 'CallObject') # generate a random number of tasks per case data_gen.duplicate_rows(duplication_factor=lambda: randint(0, 3)) data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_Services_Task.' + str(data_gen.current_row + 1)) data_gen.add_formula_column('TaskSubtype', formula=task.task_subtype) data_gen.add_formula_column('CallDurationInSeconds', formula=task.task_call_duration) data_gen.add_formula_column('CallDisposition', formula=task.task_call_disposition) data_gen.add_formula_column('CallType', formula=task.task_call_type) data_gen.add_formula_column('Status', formula=task.task_status) data_gen.add_formula_column('Priority', formula=task.task_priority) def create_date_formula(column_values): case_create_date = dateutil.parser.parse(column_values['CreatedDate__c']) case_close_date = datetime.combine(dateutil.parser.parse(column_values['ActivityDate']), case_create_date.time()) create_date = fake.date_time_between_dates(case_create_date, case_close_date) if create_date > reference_datetime: create_date = reference_datetime return create_date.isoformat(sep=' ') data_gen.add_formula_column('CreatedDate__c', create_date_formula) data_gen.add_copy_column('LastModifiedDate__c', 'CreatedDate__c') def activity_date_formula(column_values): create_date = dateutil.parser.parse(column_values['CreatedDate__c']).date() return (create_date + timedelta(days=randint(0, 14))).isoformat() data_gen.add_formula_column('ActivityDate', activity_date_formula) data_gen.add_formula_column('Subject', formula=task.task_subject_simple) data_gen.add_map_column('Type', 'Subject', value_map={ 'Call': lambda: choice(['Call', 'Meeting'], p=[.70, .30]), 'Send Letter': 'Email', 'Send Quote': 'Email', None: lambda: choice(['Meeting', 'Prep', 'Other'], p=[.50, .25, .25]) }) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write data_gen.apply_transformations() output_columns = [ 'External_Id__c', 'Owner.External_Id__c', 'Case.External_Id__c', 'CreatedDate__c', 'LastModifiedDate__c', 'ActivityDate', 'Subject', 'Type', 'TaskSubtype', 'CallDurationInSeconds', 'CallDisposition', 'CallType', 'CallObject', 'Status', 'Priority', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def run(source_file_name, output_file_name): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) rename_map = { 'Supplies Group': 'Product2Family__c', 'Region': 'Region__c', 'Route To Market': 'LeadSource', 'Elapsed Days In Sales Stage': 'TimeToClose__c', 'Sales Stage Change Count': 'SalesStageCount__c', 'Opportunity Amount USD': 'Amount', 'Deal Size Category': 'DealSizeCategory__c' } data_gen.rename_columns(rename_map) # multiple time to close by 2 data_gen.add_formula_column('TimeToClose__c', lambda cv: int(cv['TimeToClose__c']) * 2) # map existing columns to new columns data_gen.add_map_column('Competitor__c', 'Competitor Type', definitions.competitor_type) data_gen.add_map_column('Product2Name__c', 'Supplies Subgroup', definitions.supplies_subgroup_map) data_gen.add_map_column('AccountAnnualRevenue__c', 'Client Size By Revenue', definitions.client_size_rev) data_gen.add_map_column('AccountNumberOfEmployees__c', 'Client Size By Employee Count', definitions.client_size_employees) data_gen.add_map_column('AccountBookings__c', 'Revenue From Client Past Two Years', definitions.client_past_revenue) data_gen.add_map_column('IsWon', 'Opportunity Result', definitions.isWon) # generate external id data_gen.add_formula_column( 'External_Id__c', formula=lambda: 'W_Opportunity.' + str(data_gen.current_row + 1)) data_gen.add_formula_column( 'Exec_Meeting__c', lambda: choice(['true', 'false'], p=[.35, .65])) data_gen.add_formula_column( 'Interactive_Demo__c', lambda: choice(['true', 'false'], p=[.30, .70])) def ttc_formula(column_values): ttc = int(column_values['TimeToClose__c']) exec_meeting = column_values['Exec_Meeting__c'] competitor_type = column_values['Competitor Type'] demo = column_values['Interactive_Demo__c'] rev = column_values['AccountAnnualRevenue__c'] if ttc == 0: return 0 if exec_meeting == 'true': if competitor_type == 'None': ttc = ttc + 4 else: ttc = ttc - 6 if demo == 'true': if rev == 'T100': ttc = ttc + 6 else: ttc = ttc - 5 if ttc < 0: return 0 return ttc data_gen.add_formula_column('TimeToClose__c', formula=ttc_formula) data_gen.add_constant_column('IsClosed', 'true') data_gen.add_formula_column( 'RecordType.DeveloperName', formula=lambda: choice(['SimpleOpportunity', 'ChannelPartner'], p=[.70, .30])) # generate opportunity type types = [ 'Add-On Business', 'Existing Business', 'New Business', 'New Business / Add-on' ] data_gen.add_formula_column( 'Type', formula=lambda: choice(types, p=[0.1, 0.3, 0.5, 0.1])) # generate a close date year and quarter data_gen.add_formula_column('close_date_year', formula=lambda: choice(list(range(0, 30)))) data_gen.add_formula_column( 'close_date_quarter', formula=lambda: choice([1, 2, 3, 4], p=[0.21, 0.24, 0.22, 0.33])) # generate a close date offset from the year and quarter def offset_formula(column_values): day = int(round(chisquare(9) * 5)) offset = 365 * (column_values['close_date_year']) + 91 * ( column_values['close_date_quarter'] - 1) + day return offset data_gen.add_formula_column('close_date_offset__c', offset_formula) # generate a close date def close_date_formula(column_values): last_day = date(date.today().year, 12, 31) offset = column_values['close_date_offset__c'] # last day of current year - offset close_date = last_day - timedelta(days=int(offset)) return str(close_date) data_gen.add_formula_column('CloseDate', close_date_formula) # generate a create date def create_date_formula(column_values): close_date = dateutil.parser.parse(column_values['CloseDate']) offset = column_values['TimeToClose__c'] create_date = close_date - timedelta(days=int(offset)) return create_date.isoformat(sep=' ') data_gen.add_formula_column('CreatedDate__c', create_date_formula) # generate last activity date def last_activity_date_formula(column_values): create_date = dateutil.parser.parse(column_values['CreatedDate__c']) close_date = dateutil.parser.parse(column_values['CloseDate']) if close_date > today_datetime: close_date = today_datetime if create_date > today_datetime: create_date = today_datetime return fake.date_time_between_dates(create_date, close_date).date() data_gen.add_formula_column('LastActivityDate__c', formula=last_activity_date_formula) # generate StageName, ForecastCategory, and Probability data_gen.add_map_column('StageName', 'Opportunity Result', value_map={ 'Won': 'Closed Won', None: 'Closed Lost' }) data_gen.add_map_column('ForecastCategory', 'Opportunity Result', value_map={ 'Won': 'Closed', None: 'Omitted' }) data_gen.add_map_column('ForecastCategoryName', 'Opportunity Result', value_map={ 'Won': 'Closed', None: 'Omitted' }) data_gen.add_map_column('Probability', 'Opportunity Result', value_map={ 'Won': '100', None: '0' }) # randomly pick an owner from the same region region_territory_map = { 'Pacific': lambda: 'W_Sales_User.' + str(choice([1, 2, 3, 4, 5, 6])), "Northwest": lambda: 'W_Sales_User.' + str(choice([1, 2, 3, 4, 5, 6])), "Midwest": lambda: 'W_Sales_User.' + str(choice([7, 8, 9, 10, 11])), "Southwest": lambda: 'W_Sales_User.' + str(choice([7, 8, 9, 10, 11])), "Mid-Atlantic": lambda: 'W_Sales_User.' + str(choice([7, 8, 9, 10, 11])), "Northeast": lambda: 'W_Sales_User.' + str(choice([12, 13, 14, 15, 16, 17])), "Southeast": lambda: 'W_Sales_User.' + str(choice([12, 13, 14, 15, 16, 17])) } data_gen.add_map_column('Owner.External_Id__c', 'Region__c', region_territory_map) # build out helper column for account selection def account_cat_formula(column_values): x1 = column_values['Client Size By Revenue'] x2 = column_values['Client Size By Employee Count'] x3 = column_values['Revenue From Client Past Two Years'] return str(x1) + '.' + str(x2) + '.' + str(x3) data_gen.add_formula_column('account_cat', account_cat_formula) # apply pending transformations now so we can sort by account_cat data_gen.apply_transformations() data_gen.sort_by('account_cat') # helper dataset used for account selection data_gen.add_dataset('account_segment', { 'account_id': 0, 'account_count': 0, 'current_account_cat': None }) # generate a distribution of account ids def account_id_formula(column_values): account_segment = data_gen.datasets['account_segment'] account_id = account_segment['account_id'] account_count = account_segment['account_count'] current_account_cat = account_segment['current_account_cat'] if column_values[ 'account_cat'] == current_account_cat and account_count > 0: # continue with the current account_id if there are still any to take # but first decrement account count account_count += -1 account_segment['account_count'] = account_count return account_id else: # use new account id account_id += 1 # generate a random number of opportunties to associate to an account account_count = int(round(lognormal(1))) + randint(1, 7) current_account_cat = column_values['account_cat'] # update account segment dataset for next iteration account_count += -1 account_segment['account_id'] = account_id account_segment['account_count'] = account_count account_segment['current_account_cat'] = current_account_cat return account_id data_gen.add_formula_column('AccountId__c', account_id_formula) # generate account id string data_gen.add_formula_column( 'AccountExternalId__c', formula=lambda cv: 'W_Account.' + str(cv['AccountId__c'])) # generate account name string account_names = {} def account_name_formula(column_values): account_id = column_values['AccountId__c'] if account_id in account_names: return account_names[account_id] else: account_name = account.account_name() account_names[account_id] = account_name return account_name data_gen.add_formula_column('AccountName__c', formula=account_name_formula) # generate name def name_formula(column_values): account_name = column_values['AccountName__c'] amount = column_values['Amount'] product_2_name = column_values['Product2Name__c'] return account_name + ' ' + str(data_gen.current_row % 256) data_gen.add_formula_column('Name', name_formula) # apply remaining transformations data_gen.apply_transformations() # sort by account id data_gen.sort_by('AccountId__c') columns_to_write = [ 'External_Id__c', 'Product2Name__c', 'Product2Family__c', 'Region__c', 'LeadSource', 'TimeToClose__c', 'SalesStageCount__c', 'Amount', 'AccountAnnualRevenue__c', 'AccountNumberOfEmployees__c', 'AccountBookings__c', 'Competitor__c', 'DealSizeCategory__c', 'AccountExternalId__c', 'AccountName__c', 'close_date_year', 'close_date_quarter', 'close_date_offset__c', 'Exec_Meeting__c', 'Interactive_Demo__c', 'IsWon', 'IsClosed', 'Owner.External_Id__c', 'Name', 'Type', 'StageName', 'ForecastCategory', 'ForecastCategoryName', 'Probability', 'RecordType.DeveloperName' ] data_gen.write(output_file_name, columns_to_write)
def run(batch_id, source_file_name, product_output_file_name, pricebook_output_file_name): data_gen = DataGenerator() # load source file source_columns = ['Product2Name__c', 'Product2Family__c'] data_gen.load_source_file(source_file_name, source_columns) # rename columns data_gen.rename_column('Product2Name__c', 'Name') data_gen.rename_column('Product2Family__c', 'Family') # filter out duplicate data data_gen.unique() # generate product code data_gen.add_formula_column( 'External_Id__c', formula=lambda: 'W_Product.' + str(data_gen.current_row + 1)) data_gen.add_copy_column('ProductCode', 'External_Id__c') # apply transformations and write Product2 file data_gen.apply_transformations() data_gen.write(product_output_file_name) # generate pricebook entry code data_gen.add_formula_column( 'External_Id__c', formula=lambda: 'W_PricebookEntry.' + str(data_gen.current_row + 1)) # generate product id reference data_gen.add_copy_column('Product2.External_Id__c', 'ProductCode') # get map of product names to opportunity amounts shape_dataset = data_gen.load_dataset('shape', source_file_name, ['Product2Name__c', 'Amount']) amounts_by_product_name = shape_dataset.group_by('Product2Name__c') # generate unit price def unit_price_formula(column_values): # find average opportunity amount for product product_name = column_values['Name'] amounts = amounts_by_product_name[product_name] avg_amount = 0 count = 0 for amount in amounts: amount = int(amount['Amount']) if amount > 0: count += 1 avg_amount += amount avg_amount = avg_amount / count random_quantity = randint(1, 100) return int(avg_amount / random_quantity) data_gen.add_formula_column('UnitPrice', formula=unit_price_formula) data_gen.add_constant_column('IsActive', 'true') data_gen.add_constant_column('Pricebook2.Name', 'Standard Price Book') # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write PricebookEntry file data_gen.apply_transformations() data_gen.write(pricebook_output_file_name, [ 'External_Id__c', 'Product2.External_Id__c', 'IsActive', 'Pricebook2.Name', 'UnitPrice', 'analyticsdemo_batch_id__c' ])
def run(batch_id, source_file_name, output_file_name, manager_output_file_name): data_gen = DataGenerator() # load source file source_columns = ['Owner.External_Id__c', 'Team__c'] data_gen.load_source_file(source_file_name, source_columns) data_gen.unique() # rename columns data_gen.rename_column('Owner.External_Id__c', 'External_Id__c') data_gen.rename_column('Team__c', 'UserRole.Name') # add 3 manager users west_manager = ['W_User.M.' + str(len(data_gen.rows) + 1), 'West CSM'] east_manager = ['W_User.M.' + str(len(data_gen.rows) + 2), 'East CSM'] central_manager = ['W_User.M.' + str(len(data_gen.rows) + 3), 'Central CSM'] ## managers from Sales ## # west_manager = ['RVP West', 'W_Sales_User.M.' + str(len(data_gen.rows) + 1)] # east_manager = ['RVP East', 'W_Sales_User.M.' + str(len(data_gen.rows) + 2)] # central_manager = ['RVP Central', 'W_Sales_User.M.' + str(len(data_gen.rows) + 3)] ######################## data_gen.rows.append(west_manager) data_gen.rows.append(east_manager) data_gen.rows.append(central_manager) # generate company name data_gen.add_formula_column('CompanyName', formula=fake.company) # generate fake first and last name def first_name_formula(column_values): id = int(column_values['External_Id__c'].split('.')[-1]) return fake.first_name_female() if id < 13 else fake.first_name_male() data_gen.add_formula_column('FirstName', formula=first_name_formula) data_gen.add_formula_column('LastName', formula=fake.last_name) # generate data based on fake first and last name data_gen.add_formula_column('Name', lambda cv: cv['FirstName'] + ' ' + cv['LastName']) # generate data based on fake first and last name def alias_formula(column_values): alias = (column_values['FirstName'][0] + column_values['LastName']).lower() trimmed_alias = alias[:8] if len(alias) > 8 else alias return trimmed_alias data_gen.add_formula_column('Alias', formula=alias_formula) data_gen.add_formula_column('Username', lambda cv: cv['Alias'] + '@demo.user') data_gen.add_formula_column('CommunityNickname', lambda cv: cv['Alias'] + str(randint(100, 999))) data_gen.add_formula_column('Email', lambda cv: cv['Alias'] + '@webmail.com') data_gen.add_formula_column('Phone', formula=fake.phone_number) titles = ['Customer Service Representative', 'Senior Customer Service Representative'] data_gen.add_formula_column('Title', lambda: choice(titles, p=[.70, .30])) # generate constant values data_gen.add_constant_column('IsActive', 'false') data_gen.add_constant_column('TimeZoneSidKey', 'America/Los_Angeles') data_gen.add_constant_column('Profile.Name', 'Standard User') # from oppty> data_gen.add_constant_column('Profile.Name', 'Standard User') data_gen.add_constant_column('LocaleSidKey', 'en_US') data_gen.add_constant_column('LanguageLocaleKey', 'en_US') data_gen.add_constant_column('EmailEncodingKey', 'ISO-8859-1') data_gen.add_constant_column('ForecastEnabled', 'true') # this comes from Sales data_gen.add_constant_column('UserPermissionsAvantgoUser', 'false') data_gen.add_constant_column('UserPermissionsCallCenterAutoLogin', 'false') data_gen.add_constant_column('UserPermissionsChatterAnswersUser', 'false') data_gen.add_constant_column('UserPermissionsInteractionUser', 'false') data_gen.add_constant_column('UserPermissionsJigsawProspectingUser', 'false') data_gen.add_constant_column('UserPermissionsKnowledgeUser', 'false') data_gen.add_constant_column('UserPermissionsLiveAgentUser', 'false') data_gen.add_constant_column('UserPermissionsMarketingUser', 'false') data_gen.add_constant_column('UserPermissionsMobileUser', 'false') data_gen.add_constant_column('UserPermissionsOfflineUser', 'false') data_gen.add_constant_column('UserPermissionsSFContentUser', 'false') data_gen.add_constant_column('UserPermissionsSiteforceContributorUser', 'false') data_gen.add_constant_column('UserPermissionsSiteforcePublisherUser', 'false') data_gen.add_constant_column('UserPermissionsSupportUser', 'false') data_gen.add_constant_column('UserPermissionsWorkDotComUserFeature', 'false') data_gen.add_constant_column('UserPreferencesActivityRemindersPopup', 'false') data_gen.add_constant_column('UserPreferencesApexPagesDeveloperMode', 'false') data_gen.add_constant_column('UserPreferencesCacheDiagnostics', 'false') data_gen.add_constant_column('UserPreferencesContentEmailAsAndWhen', 'false') data_gen.add_constant_column('UserPreferencesContentNoEmail', 'false') data_gen.add_constant_column('UserPreferencesDisableAllFeedsEmail', 'false') data_gen.add_constant_column('UserPreferencesDisableBookmarkEmail', 'false') data_gen.add_constant_column('UserPreferencesDisableChangeCommentEmail', 'false') data_gen.add_constant_column('UserPreferencesDisableEndorsementEmail', 'false') data_gen.add_constant_column('UserPreferencesDisableFeedbackEmail', 'false') data_gen.add_constant_column('UserPreferencesDisableFileShareNotificationsForApi', 'false') data_gen.add_constant_column('UserPreferencesDisableFollowersEmail', 'false') data_gen.add_constant_column('UserPreferencesDisableLaterCommentEmail', 'false') data_gen.add_constant_column('UserPreferencesDisableLikeEmail', 'false') data_gen.add_constant_column('UserPreferencesDisableMentionsPostEmail', 'false') data_gen.add_constant_column('UserPreferencesDisableMessageEmail', 'false') data_gen.add_constant_column('UserPreferencesDisableProfilePostEmail', 'false') data_gen.add_constant_column('UserPreferencesDisableRewardEmail', 'false') data_gen.add_constant_column('UserPreferencesDisableSharePostEmail', 'false') data_gen.add_constant_column('UserPreferencesDisableWorkEmail', 'false') data_gen.add_constant_column('UserPreferencesDisCommentAfterLikeEmail', 'false') data_gen.add_constant_column('UserPreferencesDisMentionsCommentEmail', 'false') data_gen.add_constant_column('UserPreferencesDisProfPostCommentEmail', 'false') data_gen.add_constant_column('UserPreferencesEnableAutoSubForFeeds', 'false') data_gen.add_constant_column('UserPreferencesEventRemindersCheckboxDefault', 'false') data_gen.add_constant_column('UserPreferencesHideBiggerPhotoCallout', 'false') data_gen.add_constant_column('UserPreferencesHideChatterOnboardingSplash', 'false') data_gen.add_constant_column('UserPreferencesHideCSNDesktopTask', 'false') data_gen.add_constant_column('UserPreferencesHideCSNGetChatterMobileTask', 'false') data_gen.add_constant_column('UserPreferencesHideEndUserOnboardingAssistantModal', 'false') data_gen.add_constant_column('UserPreferencesHideLightningMigrationModal', 'false') data_gen.add_constant_column('UserPreferencesHideS1BrowserUI', 'false') data_gen.add_constant_column('UserPreferencesHideSecondChatterOnboardingSplash', 'false') data_gen.add_constant_column('UserPreferencesHideSfxWelcomeMat', 'false') data_gen.add_constant_column('UserPreferencesJigsawListUser', 'false') data_gen.add_constant_column('UserPreferencesLightningExperiencePreferred', 'false') data_gen.add_constant_column('UserPreferencesPathAssistantCollapsed', 'false') data_gen.add_constant_column('UserPreferencesPreviewLightning', 'false') data_gen.add_constant_column('UserPreferencesReminderSoundOff', 'false') data_gen.add_constant_column('UserPreferencesShowCityToExternalUsers', 'false') data_gen.add_constant_column('UserPreferencesShowCityToGuestUsers', 'false') data_gen.add_constant_column('UserPreferencesShowCountryToExternalUsers', 'false') data_gen.add_constant_column('UserPreferencesShowCountryToGuestUsers', 'false') data_gen.add_constant_column('UserPreferencesShowEmailToExternalUsers', 'false') data_gen.add_constant_column('UserPreferencesShowEmailToGuestUsers', 'false') data_gen.add_constant_column('UserPreferencesShowFaxToExternalUsers', 'false') data_gen.add_constant_column('UserPreferencesShowFaxToGuestUsers', 'false') data_gen.add_constant_column('UserPreferencesShowManagerToExternalUsers', 'false') data_gen.add_constant_column('UserPreferencesShowManagerToGuestUsers', 'false') data_gen.add_constant_column('UserPreferencesShowMobilePhoneToExternalUsers', 'false') data_gen.add_constant_column('UserPreferencesShowMobilePhoneToGuestUsers', 'false') data_gen.add_constant_column('UserPreferencesShowPostalCodeToExternalUsers', 'false') data_gen.add_constant_column('UserPreferencesShowPostalCodeToGuestUsers', 'false') data_gen.add_constant_column('UserPreferencesShowProfilePicToGuestUsers', 'false') data_gen.add_constant_column('UserPreferencesShowStateToExternalUsers', 'false') data_gen.add_constant_column('UserPreferencesShowStateToGuestUsers', 'false') data_gen.add_constant_column('UserPreferencesShowStreetAddressToExternalUsers', 'false') data_gen.add_constant_column('UserPreferencesShowStreetAddressToGuestUsers', 'false') data_gen.add_constant_column('UserPreferencesShowTitleToExternalUsers', 'false') data_gen.add_constant_column('UserPreferencesShowTitleToGuestUsers', 'false') data_gen.add_constant_column('UserPreferencesShowWorkPhoneToExternalUsers', 'false') data_gen.add_constant_column('UserPreferencesShowWorkPhoneToGuestUsers', 'false') data_gen.add_constant_column('UserPreferencesSortFeedByComment', 'false') data_gen.add_constant_column('UserPreferencesTaskRemindersCheckboxDefault', 'false') data_gen.add_constant_column('EmailPreferencesAutoBcc', 'false') data_gen.add_constant_column('EmailPreferencesAutoBccStayInTouch', 'false') data_gen.add_constant_column('EmailPreferencesStayInTouchReminder', 'false') data_gen.add_constant_column('UserPreferencesGlobalNavBarWTShown', 'false') data_gen.add_constant_column('UserPreferencesGlobalNavGridMenuWTShown', 'false') data_gen.add_constant_column('UserPreferencesCreateLEXAppsWTShown', 'false') # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write file data_gen.apply_transformations() data_gen.write(output_file_name) # create manager file data_gen.filter(lambda cv: 'CSM' not in cv['UserRole.Name']) manager_map = { 'West CSR': west_manager[0], 'East CSR': east_manager[0], 'Central CSR': central_manager[0] } ### this is the manager file section in Sales> ### # # create manager file # data_gen.filter(lambda cv: 'RVP' not in cv['UserRole.Name']) # manager_map = { # 'West Sales': west_manager[1], # 'East Sales': east_manager[1], # 'Central Sales': central_manager[1], # } ################################################## data_gen.add_map_column('Manager.External_Id__c', 'UserRole.Name', manager_map) data_gen.apply_transformations() data_gen.write(manager_output_file_name, ['External_Id__c', 'Manager.External_Id__c'])
def run(batch_id, source_file_name, output_file_name, reference_datetime=today_datetime): case_status = ['Escalated', 'Waiting on Customer', 'On Hold', 'Working'] data_gen = DataGenerator() # load source file source_columns = [ 'External_Id__c', 'Owner.External_Id__c', 'CreatedDate__c', 'ClosedDate__c', 'First_Contact_Close__c', 'Status' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('External_Id__c', 'Case.External_Id__c') data_gen.rename_column('Owner.External_Id__c', 'CreatedById__c') data_gen.add_formula_column('External_Id__c', '') data_gen.add_constant_column('Field__c', 'created') data_gen.add_constant_column('OldValue__c', '') data_gen.add_constant_column('NewValue__c', '') # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) data_gen.apply_transformations() current_count = 1 new_rows = [] row_count = len(data_gen.rows) for i in range(row_count): row = data_gen.rows.pop() column_values = data_gen.row_to_column_values(row) column_values['External_Id__c'] = 'W_CaseHistory.' + str(current_count) current_count += 1 case_id = column_values['Case.External_Id__c'] created_by = column_values['CreatedById__c'] created_date = dateutil.parser.parse(column_values['CreatedDate__c']) closed_date = dateutil.parser.parse(column_values['ClosedDate__c']) if closed_date > reference_datetime: closed_date = reference_datetime first_contact_close = column_values['First_Contact_Close__c'] status = column_values['Status'] # include initial created row new_rows.append(data_gen.column_values_to_row(column_values)) # include new status new_column_values = { 'External_Id__c': 'W_CaseHistory.' + str(current_count), 'Case.External_Id__c': case_id, 'CreatedById__c': created_by, 'CreatedDate__c': created_date, 'Field__c': 'Status', 'OldValue__c': '', 'NewValue__c': 'New', 'ClosedDate__c': '', 'First_Contact_Close__c': '' } new_rows.append(data_gen.column_values_to_row(new_column_values)) current_count += 1 old_value = 'New' next_event_date = created_date while next_event_date <= closed_date: next_event_date = next_event_date + timedelta(days=randint(0, 30)) if first_contact_close == 'true' and status == 'Closed': next_event_date = closed_date new_column_values = { 'External_Id__c': 'W_CaseHistory.' + str(current_count), 'Case.External_Id__c': case_id, 'CreatedById__c': created_by, 'CreatedDate__c': next_event_date, 'Field__c': 'Status', 'OldValue__c': old_value, 'NewValue__c': 'Closed', 'ClosedDate__c': '', 'First_Contact_Close__c': '' } new_rows.append( data_gen.column_values_to_row(new_column_values)) current_count += 1 break elif next_event_date >= closed_date: next_event_date = closed_date new_column_values = { 'External_Id__c': 'W_CaseHistory.' + str(current_count), 'Case.External_Id__c': case_id, 'CreatedById__c': created_by, 'CreatedDate__c': next_event_date, 'Field__c': 'Status', 'OldValue__c': old_value, 'NewValue__c': status, 'ClosedDate__c': '', 'First_Contact_Close__c': '' } new_rows.append( data_gen.column_values_to_row(new_column_values)) current_count += 1 break else: new_value = case_status[randint(0, len(case_status) - 1)] while old_value == new_value: new_value = case_status[randint(0, len(case_status) - 1)] new_column_values = { 'External_Id__c': 'W_CaseHistory.' + str(current_count), 'Case.External_Id__c': case_id, 'CreatedById__c': created_by, 'CreatedDate__c': next_event_date, 'Field__c': 'Status', 'OldValue__c': old_value, 'NewValue__c': new_value, 'ClosedDate__c': '', 'First_Contact_Close__c': '', 'analyticsdemo_batch_id__c': batch_id } new_rows.append( data_gen.column_values_to_row(new_column_values)) old_value = new_value current_count += 1 data_gen.rows = new_rows data_gen.reverse() output_columns = [ 'External_Id__c', 'Case.External_Id__c', 'CreatedById__c', 'CreatedDate__c', 'Field__c', 'OldValue__c', 'NewValue__c', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name, shape_file_name): data_gen = DataGenerator() # load source file source_columns = ['External_Id__c', 'Owner.External_Id__c'] data_gen.load_source_file(source_file_name, source_columns) data_gen.unique() # rename columns data_gen.rename_column('External_Id__c', 'Account.External_Id__c') data_gen.add_formula_column( 'External_Id__c', lambda: 'W_Services_Opportunity.' + str(data_gen.current_row + 1)) stages = [ 'Qualification', 'Needs Analysis', 'Proposal/Quote', 'Negotiation', 'Closed Won', 'Closed Lost' ] data_gen.add_formula_column( 'StageName', lambda: choice(stages, p=[.25, .20, .15, .10, .15, .15])) types = ['New Business', 'Add-On Business', 'Services', 'Renewal'] data_gen.add_formula_column('Type', lambda: choice(types, p=[.45, .27, .18, .10])) products = [ "GC20002", "GC5000 series", "GC10001", "GC50000", "GC1000 series" ] data_gen.add_formula_column('Products__c', products) data_gen.add_formula_column('Amount', lambda: 1000 * int(normal(1400, 350))) data_gen.add_formula_column( 'Name', lambda cv: 'New Opportunity [' + str(data_gen.current_row + 1) + ']') # load shape data as dataset shape_columns = [ 'Account.External_Id__c', 'CreatedDate__c', 'LastActivityDate__c' ] shape_dataset = data_gen.load_dataset('shape', shape_file_name, shape_columns) # build map of account values shape_account_map = shape_dataset.group_by('Account.External_Id__c') # generate earliest created date def create_date_formula(column_values): accounts = shape_account_map.get( column_values['Account.External_Id__c']) create_dates = [ dateutil.parser.parse(account['CreatedDate__c']) for account in accounts ] create_dates.sort() return (create_dates[0] - timedelta(days=randint(1, 45))).isoformat(sep=' ') data_gen.add_formula_column('DateTimeCreated__c', create_date_formula) # generate last activity date def last_activity_date_formula(column_values): accounts = shape_account_map.get( column_values['Account.External_Id__c']) activity_dates = [ dateutil.parser.parse(account['LastActivityDate__c']) for account in accounts ] activity_dates.sort(reverse=True) return activity_dates[0].isoformat(sep=' ') data_gen.add_formula_column('LastActivityDate__c', last_activity_date_formula) data_gen.add_copy_column('CloseDate', 'DateTimeCreated__c') # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write file data_gen.apply_transformations() output_columns = [ 'External_Id__c', 'Owner.External_Id__c', 'Account.External_Id__c', 'DateTimeCreated__c', 'CloseDate', 'LastActivityDate__c', 'Name', 'Products__c', 'StageName', 'Amount', 'Type', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name, reference_datetime=today, id_offset=0): data_gen = DataGenerator() # load source file source_columns = [ 'External_Id__c', 'Owner.External_Id__c', 'CreatedDate__c', 'LastActivityDate__c' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('External_Id__c', 'What.External_Id__c') data_gen.rename_column('LastActivityDate__c', 'ActivityDate') # generate a random number of events per opportunity data_gen.duplicate_rows(duplication_factor=lambda: randint(1, 3)) data_gen.add_formula_column( 'External_Id__c', formula=lambda: 'W_Event.' + str(id_offset + data_gen.current_row + 1)) data_gen.add_formula_column('Subject', formula=event.event_subject) data_gen.add_formula_column('EventSubtype', formula=event.event_subtype) data_gen.add_formula_column('DurationInMinutes', formula=event.event_call_duration) is_first = True def create_date_formula(column_values): oppty_create_date = dateutil.parser.parse( column_values['CreatedDate__c']) oppty_last_activity_date = dateutil.parser.parse( column_values['ActivityDate']) nonlocal is_first if is_first: create_date = oppty_last_activity_date else: create_date = fake.date_time_between_dates( oppty_create_date, oppty_last_activity_date) is_first = False if create_date > reference_datetime: create_date = reference_datetime return create_date.isoformat(sep=' ') data_gen.add_formula_column('CreatedDate__c', create_date_formula) def activity_date_formula(column_values): create_date = dateutil.parser.parse( column_values['CreatedDate__c']).date() return (create_date + timedelta(days=randint(0, 14))).isoformat() data_gen.add_formula_column('ActivityDate', activity_date_formula) def activity_datetime_formula(column_values): return dateutil.parser.parse(column_values['ActivityDate']) data_gen.add_formula_column('ActivityDateTime', activity_datetime_formula) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write data_gen.apply_transformations() data_gen.write(output_file_name)
def run(batch_id, source_file_name, output_file_name, reference_datetime=today): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) data_gen.rename_column('External_Id__c', 'Case.External_Id__c') data_gen.rename_column('Owner.External_Id__c', 'User.External_Id__c') data_gen.duplicate_rows(duplication_factor=lambda: choice( [1, 2, 3, 4, 5], p=[.65, .15, .10, .05, .05])) data_gen.add_formula_column( 'External_Id__c', formula=lambda: 'W_AgentWork.' + str(data_gen.current_row + 1)) data_gen.add_copy_column('RequestDateTime__c', 'CreatedDate__c') def created_date_formula(column_values): created_date = dateutil.parser.parse(column_values['CreatedDate__c']) closed_date = dateutil.parser.parse(column_values['ClosedDate__c']) if closed_date > reference_datetime: closed_date = reference_datetime mid_date = created_date + (closed_date - created_date) / 2 return fake.date_time_between_dates(created_date, mid_date).isoformat(sep=' ') data_gen.add_formula_column('CreatedDate__c', created_date_formula) def assigned_date_formula(column_values): created_date = dateutil.parser.parse(column_values['CreatedDate__c']) return (created_date + timedelta(seconds=randint(0, 120))).isoformat(sep=' ') data_gen.add_formula_column('AssignedDateTime__c', assigned_date_formula) def accept_date_formula(column_values): assigned_date = dateutil.parser.parse( column_values['AssignedDateTime__c']) return (assigned_date + timedelta(seconds=randint(30, 600))).isoformat(sep=' ') data_gen.add_formula_column('AcceptDateTime__c', accept_date_formula) def close_date_formula(column_values): accept_date = dateutil.parser.parse(column_values['AcceptDateTime__c']) return (accept_date + timedelta(seconds=randint(30, 1800))).isoformat(sep=' ') data_gen.add_formula_column('CloseDateTime__c', close_date_formula) def active_time_formula(column_values): accept_date = dateutil.parser.parse(column_values['AcceptDateTime__c']) close_date = dateutil.parser.parse(column_values['CloseDateTime__c']) return int((close_date - accept_date).total_seconds()) data_gen.add_formula_column('ActiveTime__c', active_time_formula) data_gen.add_formula_column('AgentCapacityWhenDeclined__c', lambda: randint(30, 1800)) def cancel_date_formula(column_values): assigned_date = dateutil.parser.parse( column_values['AssignedDateTime__c']) return (assigned_date + timedelta(seconds=randint(30, 600))).isoformat(sep=' ') data_gen.add_formula_column('CancelDateTime__c', cancel_date_formula) data_gen.add_formula_column('CapacityPercentage__c', lambda: randint(1, 101)) data_gen.add_formula_column('CapacityWeight__c', lambda: randint(1, 7)) def decline_date_formula(column_values): assigned_date = dateutil.parser.parse( column_values['AssignedDateTime__c']) return (assigned_date + timedelta(seconds=randint(30, 600))).isoformat(sep=' ') data_gen.add_formula_column('DeclineDateTime__c', decline_date_formula) data_gen.add_formula_column('DeclineReason__c', formula=fake.sentence) data_gen.add_copy_column('HandleTime__c', 'ActiveTime__c') data_gen.add_formula_column('OriginalQueue.DeveloperName', [ 'GeneralQueue', 'InternationalQueue', 'Knowledge_Translations', 'Social_Queue', 'TargetCampaign', 'Tier1Queue', 'Tier2Queue', 'Tier3Queue' ]) data_gen.add_formula_column('PushTimeout__c', lambda: randint(0, 100)) def push_timeout_date_formula(column_values): create_date = dateutil.parser.parse(column_values['CreatedDate__c']) return create_date + timedelta(seconds=column_values['PushTimeout__c']) data_gen.add_formula_column('PushTimeoutDateTime__c', push_timeout_date_formula) data_gen.add_formula_column( 'ServiceChannel.DeveloperName', ['Cases', 'LiveMessage', 'sfdc_liveagent', 'Leads']) def speed_to_answer_formula(column_values): request_date = dateutil.parser.parse( column_values['RequestDateTime__c']) accept_date = dateutil.parser.parse(column_values['AcceptDateTime__c']) return int((accept_date - request_date).total_seconds()) data_gen.add_formula_column('SpeedToAnswer__c', speed_to_answer_formula) data_gen.add_formula_column('Status__c', [ 'Assigned', 'Unavailable', 'Declined', 'Opened', 'Closed', 'DeclinedOnPushTimeout', 'Canceled' ]) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) def filter_func(column_values): created_date = dateutil.parser.parse(column_values['CreatedDate__c']) cutoff_date = reference_datetime - timedelta(days=60) return column_values['Origin'] == 'Chat' and created_date >= cutoff_date data_gen.filter(filter_function=filter_func) data_gen.apply_transformations() data_gen.sort_by('RequestDateTime__c') output_columns = [ 'External_Id__c', 'RequestDateTime__c', 'CreatedDate__c', 'AssignedDateTime__c', 'AcceptDateTime__c', 'CloseDateTime__c', 'ActiveTime__c', 'AgentCapacityWhenDeclined__c', 'CancelDateTime__c', 'CapacityPercentage__c', 'CapacityWeight__c', 'DeclineDateTime__c', 'DeclineReason__c', 'HandleTime__c', 'OriginalQueue.DeveloperName', 'PushTimeout__c', 'PushTimeoutDateTime__c', 'ServiceChannel.DeveloperName', 'SpeedToAnswer__c', 'Status__c', 'User.External_Id__c', 'Case.External_Id__c', 'analyticsdemo_batch_id__c' ] return data_gen.write(output_file_name, output_columns, 6000)
def run(source_file_name, output_file_name): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) # find mean and std of profit profits = [] for row in data_gen.rows: column_values = data_gen.row_to_column_values(row) profits.append(float(column_values['Profit'])) profit_mean = mean(profits) profit_std = std(profits) # filter out profits more than 2 std out. def filter_func(column_values): profit = float(column_values['Profit']) z_score = abs((profit - profit_mean) / profit_std) return z_score <= 2 data_gen.filter(filter_function=filter_func) store_tier_map = { 'New York 4': "Tier 1", 'New York 3': "Tier 1", 'New York 2': "Tier 1", 'New York 1': "Tier 1", 'Chicago 3': "Tier 1", 'Chicago 2': "Tier 2", 'Chicago 1': "Tier 2", 'Boston 2': "Tier 2", 'Boston 1': "Tier 3" } data_gen.add_map_column('Tier', 'Store', store_tier_map) month_channel_map = { 'January': 'Chat', 'February': 'Chat', 'March': 'Chat', 'April': 'Chat', 'May': 'Chat', 'June': 'Email', 'July': 'Email', 'August': 'Facebook', 'September': 'Phone', 'October': 'Phone', 'November': 'Website', 'December': 'Website' } data_gen.add_map_column('Origin', 'Month', month_channel_map) discount_support_map = { '0': 'Free', '0.05': 'Free', '0.15': 'Basic', '0.1': 'Silver', '0.2': 'Platinum' } data_gen.add_map_column('Type_of_Support__c', 'Discount', discount_support_map) camp_reason_map = { "Bundled": "Documentation", "Buy More & Save": "Unknown Failure", "Competitor Focus": "Feature Question", "Door Buster": "Hardware Question", "Friends & Family": "Late Delivery", "Local": "Software Question", "Paper Circular": "General Question", "Regional": "Item Damaged", "Social": "Item Damaged" } data_gen.add_map_column('Reason', 'Marketing Campaign', camp_reason_map) city_priority_map = { "Boston": "Low", "Chicago": "Medium", "New York": "High" } data_gen.add_map_column('Priority', 'City', city_priority_map) comp_sla_map = { "High": "Violation", "Normal": "Compliant", "Low": "Compliant" } data_gen.add_map_column('SLA', 'Competition', comp_sla_map) data_gen.add_constant_column('Status', 'Closed') sla_first_contact_close_map = { 'Compliant': lambda: choice(['true', 'false'], p=[.9, .1]), 'Violation': lambda: choice(['true', 'false'], p=[.7, .3]) } data_gen.add_map_column('First_Contact_Close__c', 'SLA', sla_first_contact_close_map) sla_time_open_map = { 'Compliant': lambda: choice([12, 24, 36, 48], p=[.50, .20, .20, .10]), 'Violation': lambda: choice([60, 72, 84, 96, 108, 120], p=[.60, .20, .10, .05, .03, .02]) } data_gen.add_map_column('Time_Open__c', 'SLA', sla_time_open_map) def region_formula(column_values): average_age = float(column_values['Average Age']) if average_age < 40: return 'West CSR' elif average_age >= 40.0 and average_age < 50: return 'Central CSR' else: return 'East CSR' data_gen.add_formula_column('Team__c', region_formula) def user_formula(column_values): average_age = float(column_values['Average Age']) if average_age < 40: return 'W_Services_User.' + str(choice([1, 2, 3, 4, 5])) elif average_age >= 40.0 and average_age < 50: return 'W_Services_User.' + str(choice([6, 7, 8, 9, 10, 11])) else: return 'W_Services_User.' + str(choice([12, 13, 14, 15, 16, 17])) data_gen.add_formula_column('Owner.External_Id__c', user_formula) # generate offer voucher - give vouchers to customers that were unhappy with Video Games or Cables to boost CSAT def offer_voucher_formula(column_values): csat = float(column_values['Profit Linear']) item = column_values['Item'] if item in ['Video Games', 'Cables']: return choice(['true', 'false'], p=[csat/100, (100 - csat) / 100]) else: return 'false' data_gen.add_formula_column('Offer_Voucher__c', offer_voucher_formula) def send_field_service_formula(column_values): csat = float(column_values['Profit Linear']) item = column_values['Item'] if csat >= 80.0 and item == 'Tablet': return 'true' else: return choice(['true', 'false'], p=[.25, .75]) data_gen.add_formula_column('Send_FieldService__c', send_field_service_formula) data_gen.add_map_column('IsEscalated', 'Tier', {'Tier 1': 'true', None: 'false'}) # generate close date offset # random offset covering the last 14 months data_gen.add_formula_column('close_date_offset', lambda: randint(1, 30 * 14)) # generate account id - generate a long tail distribution - cubic function +- randint # helper dataset used for account selection data_gen.add_dataset('current_account', {'account_id': 0, 'account_count': 0}) # generate a distribution of account ids def account_id_formula(column_values): current_account = data_gen.datasets['current_account'] account_id = current_account['account_id'] account_count = current_account['account_count'] if account_count > 0: # continue with the current account_id if there are still any to take # but first decrement account count account_count += -1 current_account['account_count'] = account_count else: # use new account id account_id += 1 account_count = int(round(lognormal(1))) + randint(1, 7) # update account dataset for next iteration account_count += -1 current_account['account_count'] = account_count current_account['account_id'] = account_id return 'W_Services_Account.' + str(account_id) data_gen.add_formula_column('Account.External_Id__c', account_id_formula) def csat_formula(column_values): # first normalize csat between 30-90 csat = float(column_values['Profit Linear']) new_delta = 70 csat = (new_delta * csat / 100) + 30 channel = column_values['Origin'] is_escalated = column_values['IsEscalated'] send_field_service = column_values['Send_FieldService__c'] offer_voucher = column_values['Offer_Voucher__c'] if is_escalated == 'true': if channel == 'Phone': csat = csat - 2 else: csat = csat + 2 if send_field_service == 'true': if channel == 'Phone': csat = csat - 2 else: csat = csat + 4 if offer_voucher == 'true': if channel == 'Phone': csat = csat - 2 else: csat = csat + 4 return csat data_gen.add_formula_column('CSAT__c', formula=csat_formula) data_gen.add_map_column('Outlier', 'Outlier', value_map={ 'TRUE': 'true', None: 'false' }) data_gen.apply_transformations() data_gen.add_map_column('Time_Open__c', 'First_Contact_Close__c', value_map={ 'true': 0, None: lambda cv: cv['Time_Open__c'] }) data_gen.apply_transformations() rename_map = { 'Item': 'Product_Family_KB__c' } data_gen.rename_columns(rename_map) output_columns = [ 'Origin', 'Store', 'Tier', 'Product_Family_KB__c', 'Priority', 'Average Age', 'Percent Male', 'SLA', 'Daily Revenue', 'Reason', 'Reg Price', 'Type_of_Support__c', 'Price', 'Quantity', 'Cost', 'Profit', 'CSAT__c', 'Profit Log', 'Outlier', 'Status', 'First_Contact_Close__c', 'Time_Open__c', 'Team__c', 'Owner.External_Id__c', 'close_date_offset', 'Account.External_Id__c', 'Offer_Voucher__c', 'Send_FieldService__c', 'IsEscalated' ] data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name, reference_datetime=today, id_offset=0): data_gen = DataGenerator() # load source file source_columns = [ 'External_Id__c', 'Owner.External_Id__c', 'CreatedDate__c', 'LastActivityDate__c' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('External_Id__c', 'What.External_Id__c') data_gen.rename_column('LastActivityDate__c', 'ActivityDate') # generate a random number of tasks per opportunity data_gen.duplicate_rows(duplication_factor=lambda: randint(1, 3)) data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_Task.' + str(id_offset + data_gen.current_row + 1)) data_gen.add_formula_column('TaskSubtype', formula=task.oppty_task_subtype) data_gen.add_formula_column('CallDurationInSeconds', formula=task.task_call_duration) data_gen.add_formula_column('CallDisposition', formula=task.task_call_disposition) data_gen.add_formula_column('CallType', formula=task.task_call_type) data_gen.add_formula_column('Status', formula=task.task_status) data_gen.add_formula_column('Priority', formula=task.task_priority) def create_date_formula(column_values): oppty_create_date = dateutil.parser.parse(column_values['CreatedDate__c']) oppty_last_activity_date = dateutil.parser.parse(column_values['ActivityDate']) create_date = fake.date_time_between_dates(oppty_create_date, oppty_last_activity_date) if create_date > reference_datetime: create_date = reference_datetime return create_date.isoformat(sep=' ') data_gen.add_formula_column('CreatedDate__c', create_date_formula) def activity_date_formula(column_values): create_date = dateutil.parser.parse(column_values['CreatedDate__c']).date() return (create_date + timedelta(days=randint(0, 14))).isoformat() data_gen.add_formula_column('ActivityDate', activity_date_formula) data_gen.add_formula_column('Subject', formula=task.task_subject) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write data_gen.apply_transformations() data_gen.write(output_file_name)
def run(batch_id, source_file_name, output_file_name): data_gen = DataGenerator() # load source file source_columns = ['Account.External_Id__c'] data_gen.load_source_file(source_file_name, source_columns) # rename columns data_gen.rename_column('Account.External_Id__c', 'External_Id__c') # filter out duplicate data data_gen.unique() # load shape data as dataset shape_columns = [ 'Account.External_Id__c', 'Owner.External_Id__c' ] shape_dataset = data_gen.load_dataset('shape', source_file_name, shape_columns) # build map of account values shape_account_map = shape_dataset.group_by('Account.External_Id__c') # helper method to get shape data related to an account def get_shape_data(column_values, shape_column_name): return shape_account_map.get(column_values['External_Id__c'])[0].get(shape_column_name) # generate name data_gen.add_formula_column('Name', formula=account.account_name) # generate owner def owner_formula(column_values): return get_shape_data(column_values, 'Owner.External_Id__c') data_gen.add_formula_column('Owner.External_Id__c', owner_formula) # generate account source data_gen.add_formula_column('AccountSource', formula=account.account_source) # generate annual revenue data_gen.add_formula_column('AnnualRevenue', lambda: 1000 * int(normal(2800, 600))) # generate billing street data_gen.add_formula_column('BillingStreet', formula=lambda: fake.building_number() + ' ' + fake.street_name()) # generate billing city data_gen.add_formula_column('BillingCity', formula=fake.city) # generate billing state data_gen.add_formula_column('BillingState', formula=fake.state_abbr) # generate billing postal code data_gen.add_formula_column('BillingPostalCode', formula=fake.zipcode) # generate billing country data_gen.add_constant_column('BillingCountry', 'USA') # generate industry data_gen.add_formula_column('Industry', formula=account.account_industry) # generate number employees data_gen.add_formula_column('NumberOfEmployees', lambda: int(normal(150, 35))) # generate ownership data_gen.add_formula_column('Ownership', formula=account.account_ownership) # generate phone data_gen.add_formula_column('Phone', formula=fake.phone_number) # generate rating data_gen.add_formula_column('Rating', formula=account.account_rating) # generate type data_gen.add_formula_column('Type', formula=account.account_type) # generate year started data_gen.add_formula_column('YearStarted', formula=account.account_year_started) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write file data_gen.apply_transformations() data_gen.write(output_file_name)