def run(batch_id, source_file_name, output_file_name, source_service_resources, delta=timedelta(days=14)): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) data_gen.add_formula_column('Start', lambda cv: "" if cv['Start'] == "" else (dateutil.parser.parse(cv['Start']) + timedelta(days=delta.days - 1)).replace(tzinfo=None)) data_gen.add_formula_column('End', lambda cv: "" if cv['End'] == "" else (dateutil.parser.parse(cv['End']) + timedelta(days=delta.days - 1)).replace(tzinfo=None)) service_resources = data_gen.load_dataset("ServiceResources", source_service_resources, ['Id', 'External_ID__c']).dict('Id', 'External_ID__c') data_gen.add_map_column('Resource.External_Id__c', 'ResourceId', service_resources) data_gen.apply_transformations() data_gen.add_copy_column('CreatedDate__c', 'Start') data_gen.apply_transformations() data_gen.write(output_file_name, columns=[ 'External_ID__c', 'Resource.External_Id__c', 'CreatedDate__c', 'Start', 'End', 'Type', #'State', #'Country', #'City' ])
def run(batch_id, source_file_name, output_file_name): data_gen = DataGenerator() # load source file source_columns = [ 'External_Id__c', 'Owner.External_Id__c', 'CreatedDate__c', 'EndTime__c', 'EndedBy__c', 'Status__c' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('Owner.External_Id__c', 'Agent.External_Id__c') data_gen.add_copy_column('LiveChatTranscript.External_Id__c', 'External_Id__c') data_gen.add_copy_column('Time__c', 'CreatedDate__c') data_gen.add_constant_column('Type__c', '') data_gen.add_constant_column('Detail__c', '') # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) data_gen.apply_transformations() type_detail_map = { "ChatRequest": "Visitor requested chat.", "ChoiceRoute": "Choice chat request routed to all available qualified agents.", "CancelNoAgent": "Chat request canceled because no qualifying agents were available.", "Accept": "Chat request accepted by agent.", "CancelVisitor": "Visitor clicked Cancel Chat.", "LeaveAgent": "Agent left chat.", "EndAgent": "Agent clicked End Chat.", "LeaveVisitor": "Visitor left chat.", "EndVisitor": "Visitor clicked End Chat." } current_count = 1 new_rows = [] row_count = len(data_gen.rows) for i in range(row_count): row = data_gen.rows.pop() column_values = data_gen.row_to_column_values(row) live_chat = column_values['LiveChatTranscript.External_Id__c'] agent = column_values['Agent.External_Id__c'] create_date = dateutil.parser.parse(column_values['CreatedDate__c']) end_date = dateutil.parser.parse(column_values['EndTime__c']) ended_by = column_values['EndedBy__c'] status = column_values['Status__c'] # initialize chat request new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': create_date.isoformat(sep=' '), 'Time__c': create_date.isoformat(sep=' '), 'Type__c': 'ChatRequest', 'Detail__c': 'Visitor requested chat.', 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append(data_gen.column_values_to_row(new_column_values)) if status == 'Missed': type__c = choice(['CancelVisitor', 'CancelNoAgent']) if type__c == 'CancelNoAgent': # no agents create_date = fake.date_time_between_dates( create_date, end_date) new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': create_date.isoformat(sep=' '), 'Time__c': create_date.isoformat(sep=' '), 'Type__c': 'ChoiceRoute', 'Detail__c': 'Choice chat request routed to all available qualified agents.', 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append( data_gen.column_values_to_row(new_column_values)) create_date = fake.date_time_between_dates( create_date, end_date) new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': create_date.isoformat(sep=' '), 'Time__c': create_date.isoformat(sep=' '), 'Type__c': type__c, 'Detail__c': type_detail_map[type__c], 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append( data_gen.column_values_to_row(new_column_values)) type__c = choice(['LeaveVisitor', 'EndVisitor']) new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': end_date.isoformat(sep=' '), 'Time__c': end_date.isoformat(sep=' '), 'Type__c': type__c, 'Detail__c': type_detail_map[type__c], 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append( data_gen.column_values_to_row(new_column_values)) else: # visitor canceled new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': end_date.isoformat(sep=' '), 'Time__c': end_date.isoformat(sep=' '), 'Type__c': type__c, 'Detail__c': type_detail_map[type__c], 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append( data_gen.column_values_to_row(new_column_values)) else: type__c = 'ChoiceRoute' new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': create_date.isoformat(sep=' '), 'Time__c': create_date.isoformat(sep=' '), 'Type__c': type__c, 'Detail__c': type_detail_map[type__c], 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append(data_gen.column_values_to_row(new_column_values)) type__c = 'Accept' create_date = fake.date_time_between_dates(create_date, end_date) new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': create_date.isoformat(sep=' '), 'Time__c': create_date.isoformat(sep=' '), 'Type__c': type__c, 'Detail__c': type_detail_map[type__c], 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append(data_gen.column_values_to_row(new_column_values)) if ended_by == 'Visitor': type__c = choice(['LeaveVisitor', 'EndVisitor']) new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': end_date.isoformat(sep=' '), 'Time__c': end_date.isoformat(sep=' '), 'Type__c': type__c, 'Detail__c': type_detail_map[type__c], 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append( data_gen.column_values_to_row(new_column_values)) else: type__c = choice(['LeaveAgent', 'EndAgent']) new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': end_date.isoformat(sep=' '), 'Time__c': end_date.isoformat(sep=' '), 'Type__c': type__c, 'Detail__c': type_detail_map[type__c], 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append( data_gen.column_values_to_row(new_column_values)) data_gen.rows = new_rows # apply transformations and write file output_columns = [ 'External_Id__c', 'LiveChatTranscript.External_Id__c', 'Agent.External_Id__c', 'Type__c', 'Detail__c', 'CreatedDate__c', 'Time__c', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name, reference_date=today_datetime): data_gen = DataGenerator() # load source file source_columns = [ 'External_Id__c', 'StageName', 'Amount', 'ForecastCategory', 'CloseDate', 'CreatedDate__c', 'SalesStageCount__c' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_columns({ 'StageName': 'StageName__c', 'Amount': 'Amount__c', 'ForecastCategory': 'ForecastCategory__c', 'CloseDate': 'CloseDate__c' }) data_gen.add_copy_column('Opportunity.External_Id__c', 'External_Id__c') # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) data_gen.apply_transformations() stages = ['Qualification', 'Discovery', 'Proposal/Quote', 'Negotiation'] forecast_categories = ['BestCase', 'Pipeline', 'Commit'] pipe_bucket = [ 'No Change', 'Reopen', 'Expand', 'Reduce', 'Moved Out', 'Moved In', 'Stage Change' ] pipe_bucket_ratio = [0.10, 0.05, 0.15, 0.15, 0.30, 0.10, 0.15] qualification_pipe_bucket = [ 'No Change', 'Reopen', 'Expand', 'Reduce', 'Moved Out', 'Moved In' ] qualification_pipe_bucket_ratio = [0.20, 0.05, 0.20, 0.10, 0.35, 0.10] zero_amount_pipe_bucket = [ 'No Change', 'Reopen', 'Moved Out', 'Moved In', 'Stage Change' ] zero_amount_pipe_bucket_ratio = [0.20, 0.05, 0.35, 0.10, 0.30] current_count = 1 new_rows = [] row_count = len(data_gen.rows) for i in range(row_count): row = data_gen.rows.pop() column_values = data_gen.row_to_column_values(row) opportunity_id = column_values['Opportunity.External_Id__c'] close_date = dateutil.parser.parse(column_values['CloseDate__c']) create_date = dateutil.parser.parse(column_values['CreatedDate__c']) final_amount = int(column_values['Amount__c']) final_forecast_category = column_values['ForecastCategory__c'] final_stage_name = column_values['StageName__c'] stage_count = int(column_values['SalesStageCount__c']) # initialize most recent event date to reference_date or earlier event_date_range_start = create_date + (close_date - create_date) / 2 event_date_range_end = close_date if close_date > reference_date: event_date_range_end = reference_date event_date_range_start = create_date + (reference_date - create_date) / 2 # ensure event happens on or after opportunity create_date event_date = fake.date_time_between_dates(event_date_range_start, event_date_range_end) # create final state column_values['CreatedDate__c'] = event_date column_values['External_Id__c'] = 'W_OpportunityHistory.' + str( current_count) current_count += 1 new_rows.append(data_gen.column_values_to_row(column_values)) next_create_date = event_date next_stage_name = final_stage_name next_forecast_category = final_forecast_category next_close_date = close_date next_amount = final_amount movedOut = False movedIn = False expand = False reduce = False reopen = False initialized = False # generate events in reverse order until create_date for current_stage_count in range(stage_count): # choose the proper bucket depending on the scenario bucket = pipe_bucket ratio = pipe_bucket_ratio if next_amount <= 0: bucket = zero_amount_pipe_bucket ratio = zero_amount_pipe_bucket_ratio elif next_stage_name == 'Qualification': bucket = qualification_pipe_bucket ratio = qualification_pipe_bucket_ratio event = choice(bucket, p=ratio) event_date_range_end = event_date event_date_range_start = create_date + (event_date - create_date) / 2 event_date = fake.date_time_between_dates(event_date_range_start, event_date_range_end) # if next stage is closed, make the previous event a stage change if 'Closed' in next_stage_name: event = 'Stage Change' # if the event date is the create date, create the initial state if current_stage_count == stage_count - 1: event_date = create_date event = 'Initial State' if event != 'No Change': curr_close_date = next_close_date curr_amount = next_amount curr_stage_name = next_stage_name curr_forecast_category = next_forecast_category if event == 'Reopen' and not reopen: curr_stage_name = 'Closed Lost' curr_forecast_category = 'Omitted' reopen = True elif event == 'Initial State': curr_stage_name = 'Qualification' curr_forecast_category = 'Pipeline' initialized = True elif event == 'Expand' and not expand: curr_amount = next_amount - int( uniform(.15, .45) * final_amount) if curr_amount <= 0: # reduce instead curr_amount = next_amount + int( uniform(.15, .45) * final_amount) expand = True elif event == 'Reduce' and not reduce: curr_amount = next_amount + int( uniform(.15, .45) * final_amount) reduce = True elif event == 'Moved In' and not movedIn: curr_close_date = curr_close_date + timedelta( days=randint(0, 30)) movedIn = True elif event == 'Moved Out' and not movedOut: curr_close_date = curr_close_date - timedelta( days=randint(30, 90)) movedOut = True elif event == 'Stage Change': # if next stage is not closed, use previous stage if 'Closed' not in next_stage_name and stages.index( next_stage_name) - 1 > 0: curr_stage_name = stages[stages.index(next_stage_name) - 1] # if next stage is closed, use any stage elif 'Closed' in next_stage_name: curr_stage_name = stages[randint(1, len(stages) - 1)] else: curr_stage_name = stages[0] curr_forecast_category = forecast_categories[randint( 0, len(forecast_categories) - 1)] new_column_values = { 'External_Id__c': 'W_OpportunityHistory.' + str(current_count), 'Opportunity.External_Id__c': opportunity_id, 'StageName__c': curr_stage_name, 'Amount__c': curr_amount, 'ForecastCategory__c': curr_forecast_category, 'CreatedDate__c': event_date.isoformat(sep=' '), 'CloseDate__c': curr_close_date.date().isoformat(), 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append( data_gen.column_values_to_row(new_column_values)) next_stage_name = curr_stage_name next_forecast_category = curr_forecast_category next_close_date = curr_close_date next_amount = curr_amount data_gen.rows = new_rows data_gen.reverse() data_gen.write(output_file_name, [ 'External_Id__c', 'Amount__c', 'StageName__c', 'ForecastCategory__c', 'CloseDate__c', 'CreatedDate__c', 'Opportunity.External_Id__c', 'analyticsdemo_batch_id__c' ])
def run(batch_id, source_file_name, output_file_name, reference_datetime=today): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) data_gen.rename_column('External_Id__c', 'Case.External_Id__c') data_gen.rename_column('Owner.External_Id__c', 'User.External_Id__c') data_gen.duplicate_rows(duplication_factor=lambda: choice( [1, 2, 3, 4, 5], p=[.65, .15, .10, .05, .05])) data_gen.add_formula_column( 'External_Id__c', formula=lambda: 'W_AgentWork.' + str(data_gen.current_row + 1)) data_gen.add_copy_column('RequestDateTime__c', 'CreatedDate__c') def created_date_formula(column_values): created_date = dateutil.parser.parse(column_values['CreatedDate__c']) closed_date = dateutil.parser.parse(column_values['ClosedDate__c']) if closed_date > reference_datetime: closed_date = reference_datetime mid_date = created_date + (closed_date - created_date) / 2 return fake.date_time_between_dates(created_date, mid_date).isoformat(sep=' ') data_gen.add_formula_column('CreatedDate__c', created_date_formula) def assigned_date_formula(column_values): created_date = dateutil.parser.parse(column_values['CreatedDate__c']) return (created_date + timedelta(seconds=randint(0, 120))).isoformat(sep=' ') data_gen.add_formula_column('AssignedDateTime__c', assigned_date_formula) def accept_date_formula(column_values): assigned_date = dateutil.parser.parse( column_values['AssignedDateTime__c']) return (assigned_date + timedelta(seconds=randint(30, 600))).isoformat(sep=' ') data_gen.add_formula_column('AcceptDateTime__c', accept_date_formula) def close_date_formula(column_values): accept_date = dateutil.parser.parse(column_values['AcceptDateTime__c']) return (accept_date + timedelta(seconds=randint(30, 1800))).isoformat(sep=' ') data_gen.add_formula_column('CloseDateTime__c', close_date_formula) def active_time_formula(column_values): accept_date = dateutil.parser.parse(column_values['AcceptDateTime__c']) close_date = dateutil.parser.parse(column_values['CloseDateTime__c']) return int((close_date - accept_date).total_seconds()) data_gen.add_formula_column('ActiveTime__c', active_time_formula) data_gen.add_formula_column('AgentCapacityWhenDeclined__c', lambda: randint(30, 1800)) def cancel_date_formula(column_values): assigned_date = dateutil.parser.parse( column_values['AssignedDateTime__c']) return (assigned_date + timedelta(seconds=randint(30, 600))).isoformat(sep=' ') data_gen.add_formula_column('CancelDateTime__c', cancel_date_formula) data_gen.add_formula_column('CapacityPercentage__c', lambda: randint(1, 101)) data_gen.add_formula_column('CapacityWeight__c', lambda: randint(1, 7)) def decline_date_formula(column_values): assigned_date = dateutil.parser.parse( column_values['AssignedDateTime__c']) return (assigned_date + timedelta(seconds=randint(30, 600))).isoformat(sep=' ') data_gen.add_formula_column('DeclineDateTime__c', decline_date_formula) data_gen.add_formula_column('DeclineReason__c', formula=fake.sentence) data_gen.add_copy_column('HandleTime__c', 'ActiveTime__c') data_gen.add_formula_column('OriginalQueue.DeveloperName', [ 'GeneralQueue', 'InternationalQueue', 'Knowledge_Translations', 'Social_Queue', 'TargetCampaign', 'Tier1Queue', 'Tier2Queue', 'Tier3Queue' ]) data_gen.add_formula_column('PushTimeout__c', lambda: randint(0, 100)) def push_timeout_date_formula(column_values): create_date = dateutil.parser.parse(column_values['CreatedDate__c']) return create_date + timedelta(seconds=column_values['PushTimeout__c']) data_gen.add_formula_column('PushTimeoutDateTime__c', push_timeout_date_formula) data_gen.add_formula_column( 'ServiceChannel.DeveloperName', ['Cases', 'LiveMessage', 'sfdc_liveagent', 'Leads']) def speed_to_answer_formula(column_values): request_date = dateutil.parser.parse( column_values['RequestDateTime__c']) accept_date = dateutil.parser.parse(column_values['AcceptDateTime__c']) return int((accept_date - request_date).total_seconds()) data_gen.add_formula_column('SpeedToAnswer__c', speed_to_answer_formula) data_gen.add_formula_column('Status__c', [ 'Assigned', 'Unavailable', 'Declined', 'Opened', 'Closed', 'DeclinedOnPushTimeout', 'Canceled' ]) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) def filter_func(column_values): created_date = dateutil.parser.parse(column_values['CreatedDate__c']) cutoff_date = reference_datetime - timedelta(days=60) return column_values['Origin'] == 'Chat' and created_date >= cutoff_date data_gen.filter(filter_function=filter_func) data_gen.apply_transformations() data_gen.sort_by('RequestDateTime__c') output_columns = [ 'External_Id__c', 'RequestDateTime__c', 'CreatedDate__c', 'AssignedDateTime__c', 'AcceptDateTime__c', 'CloseDateTime__c', 'ActiveTime__c', 'AgentCapacityWhenDeclined__c', 'CancelDateTime__c', 'CapacityPercentage__c', 'CapacityWeight__c', 'DeclineDateTime__c', 'DeclineReason__c', 'HandleTime__c', 'OriginalQueue.DeveloperName', 'PushTimeout__c', 'PushTimeoutDateTime__c', 'ServiceChannel.DeveloperName', 'SpeedToAnswer__c', 'Status__c', 'User.External_Id__c', 'Case.External_Id__c', 'analyticsdemo_batch_id__c' ] return data_gen.write(output_file_name, output_columns, 6000)
def run(batch_id, source_file_name, output_file_name, reference_datetime=today): data_gen = DataGenerator() # load source file source_columns = [ 'External_Id__c', 'Owner.External_Id__c', 'CreatedDate__c', 'LastActivityDate__c' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('External_Id__c', 'Case.External_Id__c') data_gen.duplicate_rows(duplication_factor=lambda: randint(0, 3)) data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_Services_Event.' + str(data_gen.current_row + 1)) data_gen.add_formula_column('Subject', formula=event.event_subject) data_gen.add_formula_column('EventSubtype', formula=event.event_subtype) data_gen.add_formula_column('DurationInMinutes', formula=event.event_call_duration) def create_date_formula(column_values): case_create_date = dateutil.parser.parse(column_values['CreatedDate__c']) case_close_date = datetime.combine(dateutil.parser.parse(column_values['LastActivityDate__c']), case_create_date.time()) if case_close_date > reference_datetime: case_close_date = reference_datetime create_date = fake.date_time_between_dates(case_create_date, case_close_date) if create_date > reference_datetime: create_date = reference_datetime return create_date.isoformat(sep=' ') data_gen.add_formula_column('CreatedDate__c', create_date_formula) data_gen.add_copy_column('LastModifiedDate__c', 'CreatedDate__c') def activity_date_formula(column_values): create_date = dateutil.parser.parse(column_values['CreatedDate__c']).date() return (create_date + timedelta(days=randint(0, 14))).isoformat() data_gen.add_formula_column('ActivityDate', activity_date_formula) def activity_datetime_formula(column_values): return dateutil.parser.parse(column_values['ActivityDate']) data_gen.add_formula_column('ActivityDateTime', activity_datetime_formula) data_gen.add_constant_column('ShowAs', 'Busy') # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write data_gen.apply_transformations() output_columns = [ 'External_Id__c', 'Owner.External_Id__c', 'Case.External_Id__c', 'Subject', 'EventSubtype', 'DurationInMinutes', 'ShowAs', 'CreatedDate__c', 'LastModifiedDate__c', 'ActivityDate', 'ActivityDateTime', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name, reference_datetime=today): data_gen = DataGenerator() # load source file source_columns = [ 'External_Id__c', 'Owner.External_Id__c', 'CreatedDate__c', 'ClosedDate__c', 'Origin' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('External_Id__c', 'Case.External_Id__c') data_gen.rename_column('ClosedDate__c', 'EndTime__c') data_gen.duplicate_rows(duplication_factor=lambda: choice( [1, 2, 3, 4, 5], p=[.65, .15, .10, .05, .05])) data_gen.add_formula_column( 'External_Id__c', lambda: 'W_LiveChatTranscript.' + str(data_gen.current_row + 1)) data_gen.add_formula_column('Abandoned__c', lambda: randint(1, 300)) data_gen.add_formula_column('AverageResponseTimeOperator__c', lambda: randint(1, 180)) data_gen.add_formula_column('AverageResponseTimeVisitor__c', lambda: randint(1, 180)) data_gen.add_formula_column('Body__c', formula=fake.body) data_gen.add_formula_column('Browser__c', formula=fake.browser) data_gen.add_constant_column('BrowserLanguage__c', 'en_US') data_gen.add_formula_column('ChatDuration__c', lambda: randint(1, 600)) data_gen.add_formula_column('ChatKey__c', formula=fake.md5) data_gen.add_formula_column('IpAddress__c', formula=fake.ipv4) data_gen.add_formula_column('LiveChatButton.DeveloperName', ['Public_Website_Chat_Button']) data_gen.add_formula_column('Location__c', formula=fake.city) data_gen.add_formula_column('MaxResponseTimeOperator__c', lambda: randint(1, 120)) data_gen.add_formula_column('MaxResponseTimeVisitor__c', lambda: randint(1, 240)) data_gen.add_formula_column('Name__c', lambda: str(data_gen.current_row + 1).zfill(8)) data_gen.add_formula_column('OperatorMessageCount__c', lambda: randint(1, 100)) data_gen.add_formula_column( 'Platform__c', ['MacOSX', 'iOS', 'Android', 'Windows', 'Unix']) referrer = [ "https://na17.salesforce.com/setup/forcecomHomepage.apexp?setupid=ForceCom&retURL=%2Fui%2Fsupport%2Fservicedesk%2FServiceDeskPage", "https://na13.salesforce.com/home/home.jsp", "https://sdodemo-main.force.com/partners/servlet/servlet.Integration?lid=01ra0000001VlbA&ic=1", "https://sitestudio.na17.force.com/?exitURL=%2F_ui%2Fnetworks%2Fsetup%2FSetupNetworksPage%2Fd", "https://mail.google.com/mail/u/0/", "https://sdodemo-main.force.com/customers/servlet/servlet.Integration?lid=01ra0000001VlbP&ic=1", "https://sdodemo-main.force.com/consumers/servlet/servlet.Integration?lid=01ro0000000EN78&ic=1", "https://na17.salesforce.com/servlet/servlet.su?oid=00D300000007EfQ&retURL=%2F0033000000PuxU2&sunetworkuserid=005a000000AuCha&sunetworkid=0DBo0000000Gn4h", "https://sdodemo-main.force.com/customers/servlet/servlet.Integration?ic=1&lid=01ra0000001VlbP" ] data_gen.add_formula_column('ReferrerUri__c', referrer) def create_date_formula(column_values): case_create_date = dateutil.parser.parse( column_values['CreatedDate__c']) case_close_date = dateutil.parser.parse(column_values['EndTime__c']) create_date = fake.date_time_between_dates(case_create_date, case_close_date) if create_date > reference_datetime: create_date = reference_datetime return create_date.isoformat(sep=' ') data_gen.add_formula_column('CreatedDate__c', create_date_formula) def start_time_formula(column_values): create_date = dateutil.parser.parse(column_values['CreatedDate__c']) start_time = create_date + timedelta(seconds=randint(1, 300)) return start_time.isoformat(sep=' ') data_gen.add_formula_column('StartTime__c', start_time_formula) def end_time_formula(column_values): create_date = dateutil.parser.parse(column_values['StartTime__c']) end_time = create_date + timedelta(seconds=randint(1, 600)) return end_time.isoformat(sep=' ') data_gen.add_formula_column('EndTime__c', end_time_formula) data_gen.add_copy_column('RequestTime__c', 'CreatedDate__c') data_gen.add_formula_column( 'Status__c', lambda: choice(['Missed', 'Completed'], p=[.20, .80])) data_gen.add_map_column('EndedBy__c', 'Status__c', { 'Completed': ['Visitor', 'Agent'], None: 'Visitor' }) data_gen.add_constant_column('SupervisorTranscriptBody__c', '') data_gen.add_constant_column('ScreenResolution__c', '') data_gen.add_formula_column('UserAgent__c', formula=fake.user_agent) data_gen.add_formula_column('VisitorMessageCount__c', lambda: randint(1, 50)) data_gen.add_formula_column('WaitTime__c', lambda: randint(1, 120)) def last_referenced_date_formula(column_values): create_date = dateutil.parser.parse(column_values['CreatedDate__c']) last_referenced_date = create_date + timedelta(seconds=randint(1, 300)) return last_referenced_date.isoformat(sep=' ') data_gen.add_formula_column('LastReferencedDate__c', last_referenced_date_formula) data_gen.add_copy_column('LastViewedDate__c', 'LastReferencedDate__c') # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) def filter_func(column_values): return column_values['Origin'] == 'Chat' data_gen.filter(filter_function=filter_func) # apply transformations and write file data_gen.apply_transformations() data_gen.sort_by('StartTime__c') output_columns = [ 'External_Id__c', 'Abandoned__c', 'AverageResponseTimeOperator__c', 'MaxResponseTimeOperator__c', 'OperatorMessageCount__c', 'Body__c', 'Browser__c', 'BrowserLanguage__c', 'Case.External_Id__c', 'ChatDuration__c', 'ChatKey__c', 'CreatedDate__c', 'StartTime__c', 'EndTime__c', 'EndedBy__c', 'LastReferencedDate__c', 'LastViewedDate__c', 'LiveChatButton.DeveloperName', 'Location__c', 'Owner.External_Id__c', 'Platform__c', 'ReferrerUri__c', 'ScreenResolution__c', 'RequestTime__c', 'Status__c', 'SupervisorTranscriptBody__c', 'UserAgent__c', 'AverageResponseTimeVisitor__c', 'IpAddress__c', 'MaxResponseTimeVisitor__c', 'VisitorMessageCount__c', 'WaitTime__c', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name, reference_date=today_datetime, filter_function=None): def get_close_date(values): return dateutil.parser.parse(values['CloseDate']) def get_create_date(values): return dateutil.parser.parse(values['CreatedDate__c']) data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # add an age column data_gen.add_copy_column('Age__c', 'TimeToClose__c') # generate a close date def close_date_formula(column_values): last_day = date(date.today().year, 12, 31) offset = column_values['close_date_offset__c'] # last day of current year - offset close_date = last_day - timedelta(days=int(offset)) return str(close_date) data_gen.add_formula_column('CloseDate', close_date_formula) # generate a create date def create_date_formula(column_values): close_date = dateutil.parser.parse(column_values['CloseDate']) offset = column_values['TimeToClose__c'] create_date = close_date - timedelta(days=int(offset)) return create_date.isoformat(sep=' ') data_gen.add_formula_column('CreatedDate__c', create_date_formula) # generate last activity date def last_activity_date_formula(column_values): create_date = get_create_date(column_values) close_date = get_close_date(column_values) if close_date > reference_date: close_date = reference_date if create_date > reference_date: create_date = reference_date return fake.date_time_between_dates(create_date, close_date).date() data_gen.add_formula_column('LastActivityDate__c', formula=last_activity_date_formula) data_gen.apply_transformations() if filter_function: data_gen.filter(filter_function) new_rows = [] row_count = len(data_gen.rows) for i in range(row_count): row = data_gen.rows.pop() column_values = data_gen.row_to_column_values(row) close_day = get_close_date(column_values) create_day = get_create_date(column_values) # if close date is before reference date keep it exactly as is if close_day <= reference_date: new_rows.append(row) # if create date is before reference date, but the close date is after reference date elif (create_day <= reference_date) and (close_day > reference_date): # set age age = (reference_date - create_day).days column_values['Age__c'] = age ttc = float(column_values['TimeToClose__c']) pct = age / ttc # set IsClosed to blank column_values['IsClosed'] = '' # set IsWon to blank column_values['IsWon'] = '' # set a stage name stage_name_index = int(floor(pct * 4) + choice([-1, 0, 1], p=[.2, .7, .1])) # adjust the stage name index if stage_name_index < 0: stage_name_index = 0 if stage_name_index > 3: stage_name_index = 3 column_values['StageName'] = definitions.stage_name[stage_name_index] column_values['Probability'] = definitions.probabilities[stage_name_index] column_values['ForecastCategory'] = definitions.forecast_category[choice([1, 2, 4], p=[.625, .25, .125])] column_values['ForecastCategoryName'] = definitions.forecast_category_name[column_values['ForecastCategory']] column_values['SalesStageCount__c'] = ceil(pct * float(column_values['SalesStageCount__c'])) new_rows.append(data_gen.column_values_to_row(column_values)) data_gen.rows = new_rows data_gen.reverse() data_gen.write(output_file_name)
def run(batch_id, source_file_name, output_file_name, shape_file_name): data_gen = DataGenerator() # load source file source_columns = ['External_Id__c', 'Owner.External_Id__c'] data_gen.load_source_file(source_file_name, source_columns) data_gen.unique() # rename columns data_gen.rename_column('External_Id__c', 'Account.External_Id__c') data_gen.add_formula_column( 'External_Id__c', lambda: 'W_Services_Opportunity.' + str(data_gen.current_row + 1)) stages = [ 'Qualification', 'Needs Analysis', 'Proposal/Quote', 'Negotiation', 'Closed Won', 'Closed Lost' ] data_gen.add_formula_column( 'StageName', lambda: choice(stages, p=[.25, .20, .15, .10, .15, .15])) types = ['New Business', 'Add-On Business', 'Services', 'Renewal'] data_gen.add_formula_column('Type', lambda: choice(types, p=[.45, .27, .18, .10])) products = [ "GC20002", "GC5000 series", "GC10001", "GC50000", "GC1000 series" ] data_gen.add_formula_column('Products__c', products) data_gen.add_formula_column('Amount', lambda: 1000 * int(normal(1400, 350))) data_gen.add_formula_column( 'Name', lambda cv: 'New Opportunity [' + str(data_gen.current_row + 1) + ']') # load shape data as dataset shape_columns = [ 'Account.External_Id__c', 'CreatedDate__c', 'LastActivityDate__c' ] shape_dataset = data_gen.load_dataset('shape', shape_file_name, shape_columns) # build map of account values shape_account_map = shape_dataset.group_by('Account.External_Id__c') # generate earliest created date def create_date_formula(column_values): accounts = shape_account_map.get( column_values['Account.External_Id__c']) create_dates = [ dateutil.parser.parse(account['CreatedDate__c']) for account in accounts ] create_dates.sort() return (create_dates[0] - timedelta(days=randint(1, 45))).isoformat(sep=' ') data_gen.add_formula_column('DateTimeCreated__c', create_date_formula) # generate last activity date def last_activity_date_formula(column_values): accounts = shape_account_map.get( column_values['Account.External_Id__c']) activity_dates = [ dateutil.parser.parse(account['LastActivityDate__c']) for account in accounts ] activity_dates.sort(reverse=True) return activity_dates[0].isoformat(sep=' ') data_gen.add_formula_column('LastActivityDate__c', last_activity_date_formula) data_gen.add_copy_column('CloseDate', 'DateTimeCreated__c') # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write file data_gen.apply_transformations() output_columns = [ 'External_Id__c', 'Owner.External_Id__c', 'Account.External_Id__c', 'DateTimeCreated__c', 'CloseDate', 'LastActivityDate__c', 'Name', 'Products__c', 'StageName', 'Amount', 'Type', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name, reference_date=today_datetime): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name, ['External_Id__c']) data_gen.rename_column('External_Id__c', 'User.External_Id__c') data_gen.add_copy_column('Owner.External_Id__c', 'User.External_Id__c') data_gen.duplicate_rows(duplication_factor=lambda: int(normal(60, 10))) data_gen.add_formula_column( 'External_Id__c', lambda: 'W_UserServicePresence.' + str(data_gen.current_row + 1)) data_gen.add_formula_column('AtCapacityDuration__c', lambda: randint(30, 900)) data_gen.add_formula_column('AverageCapacity__c', lambda: randint(30, 500)) data_gen.add_formula_column('ConfiguredCapacity__c', lambda: randint(30, 600)) start_date = reference_date - timedelta(days=365) end_date = reference_date data_gen.add_formula_column( 'CreatedDate__c', lambda: fake.date_time_between_dates( start_date, end_date).isoformat(sep=' ')) data_gen.add_formula_column('IdleDuration__c', lambda: randint(30, 600)) data_gen.add_formula_column('IsCurrentState__c', lambda: choice(['true', 'false'])) data_gen.add_formula_column('IsAway__c', lambda: choice(['true', 'false'])) data_gen.add_formula_column('StatusDuration__c', lambda: randint(30, 900)) data_gen.add_copy_column('StatusStartDate__c', 'CreatedDate__c') def status_end_date_formula(column_values): start_date = dateutil.parser.parse(column_values['StatusStartDate__c']) status_duration = int(column_values['StatusDuration__c']) return (start_date + timedelta(seconds=status_duration)).isoformat(sep=' ') data_gen.add_formula_column('StatusEndDate__c', formula=status_end_date_formula) data_gen.add_formula_column('ServicePresenceStatus.DeveloperName', [ 'Busy', 'Online', 'Available_Live_Agent', 'Busy_Break', 'Busy_Lunch', 'Busy_Training', 'Available_LiveMessage' ]) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) data_gen.apply_transformations() output_columns = [ 'External_Id__c', 'User.External_Id__c', 'Owner.External_Id__c', 'AtCapacityDuration__c', 'AverageCapacity__c', 'ConfiguredCapacity__c', 'CreatedDate__c', 'IdleDuration__c', 'IsAway__c', 'IsCurrentState__c', 'StatusDuration__c', 'StatusStartDate__c', 'StatusEndDate__c', 'ServicePresenceStatus.DeveloperName', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name, source_accounts, source_service_resources, source_service_territories, source_work_orders, reference_datetime=today): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) data_gen.add_formula_column( 'EarliestStartTime', lambda cv: dateutil.parser.parse(cv['EarliestStartTime'])) data_gen.apply_transformations() data_gen.sort_by('EarliestStartTime', reverse=True) # shift dates to be 2 weeks prior to the reference date delta = reference_datetime.date() - data_gen.row_to_column_values( data_gen.rows[0])['EarliestStartTime'].date() data_gen.add_formula_column( 'EarliestStartTime', lambda cv: (cv['EarliestStartTime'] + timedelta( days=delta.days - 1)).replace(tzinfo=None)) data_gen.add_formula_column( 'ActualStartTime', lambda cv: "" if cv['ActualStartTime'] == "" else (dateutil.parser.parse(cv['ActualStartTime']) + timedelta( days=delta.days - 1)).replace(tzinfo=None)) data_gen.add_formula_column( 'ActualEndTime', lambda cv: "" if cv['ActualEndTime'] == "" else (dateutil.parser.parse(cv['ActualEndTime']) + timedelta( days=delta.days - 1)).replace(tzinfo=None)) data_gen.add_formula_column( 'ArrivalWindowStartTime', lambda cv: "" if cv['ArrivalWindowStartTime'] == "" else (dateutil.parser.parse(cv['ArrivalWindowStartTime']) + timedelta( days=delta.days - 1)).replace(tzinfo=None)) data_gen.add_formula_column( 'ArrivalWindowEndTime', lambda cv: "" if cv['ArrivalWindowEndTime'] == "" else (dateutil.parser.parse(cv['ArrivalWindowEndTime']) + timedelta( days=delta.days - 1)).replace(tzinfo=None)) data_gen.add_formula_column( 'DueDate', lambda cv: "" if cv['DueDate'] == "" else (dateutil.parser.parse(cv[ 'DueDate']) + timedelta(days=delta.days - 1)).replace(tzinfo=None)) data_gen.apply_transformations() data_gen.add_copy_column('CreatedDate__c', 'EarliestStartTime') accounts = data_gen.load_dataset("Accounts", source_accounts, ['Id', 'External_ID__c']).dict( 'Id', 'External_ID__c') data_gen.add_map_column('Account.External_Id__c', 'AccountId', accounts) service_resources = data_gen.load_dataset("ServiceResources", source_service_resources, ['Id', 'External_ID__c']).dict( 'Id', 'External_ID__c') data_gen.add_map_column('ServiceResource.External_Id__c', 'FSLDemoTools_Service_Resource__c', service_resources) service_territories = data_gen.load_dataset("ServiceTerritories", source_service_territories, ['Id', 'External_ID__c']).dict( 'Id', 'External_ID__c') data_gen.add_map_column('ServiceTerritory.External_Id__c', 'ServiceTerritoryId', service_territories) work_orders = data_gen.load_dataset("WorkOrders", source_work_orders, ['Id', 'External_ID__c']).dict( 'Id', 'External_ID__c') data_gen.add_map_column('WorkOrder.External_Id__c', 'ParentRecordId', work_orders) data_gen.apply_transformations() data_gen.filter( lambda cv: cv['WorkOrder.External_Id__c'].startswith('WO.')) data_gen.apply_transformations() data_gen.write( output_file_name, columns=[ 'External_ID__c', 'CreatedDate__c', 'ServiceResource.External_Id__c', 'ServiceTerritory.External_Id__c', 'WorkOrder.External_Id__c', 'ActualStartTime', 'ArrivalWindowStartTime', 'ActualDuration', 'EarliestStartTime', 'Duration', 'DurationType', 'Status', 'DueDate', 'ActualEndTime', 'ArrivalWindowEndTime' ]) return delta
def run(batch_id, source_file_name, output_file_name, reference_datetime=today): data_gen = DataGenerator() # load source file source_columns = [ 'External_Id__c', 'Owner.External_Id__c', 'CreatedDate__c', 'LastActivityDate__c', 'Team__c' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('External_Id__c', 'Case.External_Id__c') data_gen.rename_column('LastActivityDate__c', 'ActivityDate') data_gen.rename_column('Team__c', 'CallObject') # generate a random number of tasks per case data_gen.duplicate_rows(duplication_factor=lambda: randint(0, 3)) data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_Services_Task.' + str(data_gen.current_row + 1)) data_gen.add_formula_column('TaskSubtype', formula=task.task_subtype) data_gen.add_formula_column('CallDurationInSeconds', formula=task.task_call_duration) data_gen.add_formula_column('CallDisposition', formula=task.task_call_disposition) data_gen.add_formula_column('CallType', formula=task.task_call_type) data_gen.add_formula_column('Status', formula=task.task_status) data_gen.add_formula_column('Priority', formula=task.task_priority) def create_date_formula(column_values): case_create_date = dateutil.parser.parse(column_values['CreatedDate__c']) case_close_date = datetime.combine(dateutil.parser.parse(column_values['ActivityDate']), case_create_date.time()) create_date = fake.date_time_between_dates(case_create_date, case_close_date) if create_date > reference_datetime: create_date = reference_datetime return create_date.isoformat(sep=' ') data_gen.add_formula_column('CreatedDate__c', create_date_formula) data_gen.add_copy_column('LastModifiedDate__c', 'CreatedDate__c') def activity_date_formula(column_values): create_date = dateutil.parser.parse(column_values['CreatedDate__c']).date() return (create_date + timedelta(days=randint(0, 14))).isoformat() data_gen.add_formula_column('ActivityDate', activity_date_formula) data_gen.add_formula_column('Subject', formula=task.task_subject_simple) data_gen.add_map_column('Type', 'Subject', value_map={ 'Call': lambda: choice(['Call', 'Meeting'], p=[.70, .30]), 'Send Letter': 'Email', 'Send Quote': 'Email', None: lambda: choice(['Meeting', 'Prep', 'Other'], p=[.50, .25, .25]) }) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write data_gen.apply_transformations() output_columns = [ 'External_Id__c', 'Owner.External_Id__c', 'Case.External_Id__c', 'CreatedDate__c', 'LastModifiedDate__c', 'ActivityDate', 'Subject', 'Type', 'TaskSubtype', 'CallDurationInSeconds', 'CallDisposition', 'CallType', 'CallObject', 'Status', 'Priority', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, product_output_file_name, pricebook_output_file_name): data_gen = DataGenerator() # load source file source_columns = ['Product2Name__c', 'Product2Family__c'] data_gen.load_source_file(source_file_name, source_columns) # rename columns data_gen.rename_column('Product2Name__c', 'Name') data_gen.rename_column('Product2Family__c', 'Family') # filter out duplicate data data_gen.unique() # generate product code data_gen.add_formula_column( 'External_Id__c', formula=lambda: 'W_Product.' + str(data_gen.current_row + 1)) data_gen.add_copy_column('ProductCode', 'External_Id__c') # apply transformations and write Product2 file data_gen.apply_transformations() data_gen.write(product_output_file_name) # generate pricebook entry code data_gen.add_formula_column( 'External_Id__c', formula=lambda: 'W_PricebookEntry.' + str(data_gen.current_row + 1)) # generate product id reference data_gen.add_copy_column('Product2.External_Id__c', 'ProductCode') # get map of product names to opportunity amounts shape_dataset = data_gen.load_dataset('shape', source_file_name, ['Product2Name__c', 'Amount']) amounts_by_product_name = shape_dataset.group_by('Product2Name__c') # generate unit price def unit_price_formula(column_values): # find average opportunity amount for product product_name = column_values['Name'] amounts = amounts_by_product_name[product_name] avg_amount = 0 count = 0 for amount in amounts: amount = int(amount['Amount']) if amount > 0: count += 1 avg_amount += amount avg_amount = avg_amount / count random_quantity = randint(1, 100) return int(avg_amount / random_quantity) data_gen.add_formula_column('UnitPrice', formula=unit_price_formula) data_gen.add_constant_column('IsActive', 'true') data_gen.add_constant_column('Pricebook2.Name', 'Standard Price Book') # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write PricebookEntry file data_gen.apply_transformations() data_gen.write(pricebook_output_file_name, [ 'External_Id__c', 'Product2.External_Id__c', 'IsActive', 'Pricebook2.Name', 'UnitPrice', 'analyticsdemo_batch_id__c' ])