def create_date_formula(column_values): case_create_date = dateutil.parser.parse(column_values['CreatedDate__c']) case_close_date = datetime.combine(dateutil.parser.parse(column_values['ActivityDate']), case_create_date.time()) create_date = fake.date_time_between_dates(case_create_date, case_close_date) if create_date > reference_datetime: create_date = reference_datetime return create_date.isoformat(sep=' ')
def create_date_formula(column_values): oppty_create_date = dateutil.parser.parse(column_values['CreatedDate__c']) oppty_last_activity_date = dateutil.parser.parse(column_values['ActivityDate']) create_date = fake.date_time_between_dates(oppty_create_date, oppty_last_activity_date) if create_date > reference_datetime: create_date = reference_datetime return create_date.isoformat(sep=' ')
def created_date_formula(column_values): created_date = dateutil.parser.parse(column_values['CreatedDate__c']) closed_date = dateutil.parser.parse(column_values['ClosedDate__c']) if closed_date > reference_datetime: closed_date = reference_datetime mid_date = created_date + (closed_date - created_date) / 2 return fake.date_time_between_dates(created_date, mid_date).isoformat(sep=' ')
def last_activity_date_formula(column_values): create_date = get_create_date(column_values) close_date = get_close_date(column_values) if close_date > reference_date: close_date = reference_date if create_date > reference_date: create_date = reference_date return fake.date_time_between_dates(create_date, close_date).date()
def last_activity_date_formula(column_values): create_date = dateutil.parser.parse(column_values['CreatedDate__c']) close_date = dateutil.parser.parse(column_values['CloseDate']) if close_date > today_datetime: close_date = today_datetime if create_date > today_datetime: create_date = today_datetime return fake.date_time_between_dates(create_date, close_date).date()
def run(batch_id, source_file_name, output_file_name): data_gen = DataGenerator() # load source file source_columns = [ 'External_Id__c', 'Owner.External_Id__c', 'CreatedDate__c', 'EndTime__c', 'EndedBy__c', 'Status__c' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('Owner.External_Id__c', 'Agent.External_Id__c') data_gen.add_copy_column('LiveChatTranscript.External_Id__c', 'External_Id__c') data_gen.add_copy_column('Time__c', 'CreatedDate__c') data_gen.add_constant_column('Type__c', '') data_gen.add_constant_column('Detail__c', '') # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) data_gen.apply_transformations() type_detail_map = { "ChatRequest": "Visitor requested chat.", "ChoiceRoute": "Choice chat request routed to all available qualified agents.", "CancelNoAgent": "Chat request canceled because no qualifying agents were available.", "Accept": "Chat request accepted by agent.", "CancelVisitor": "Visitor clicked Cancel Chat.", "LeaveAgent": "Agent left chat.", "EndAgent": "Agent clicked End Chat.", "LeaveVisitor": "Visitor left chat.", "EndVisitor": "Visitor clicked End Chat." } current_count = 1 new_rows = [] row_count = len(data_gen.rows) for i in range(row_count): row = data_gen.rows.pop() column_values = data_gen.row_to_column_values(row) live_chat = column_values['LiveChatTranscript.External_Id__c'] agent = column_values['Agent.External_Id__c'] create_date = dateutil.parser.parse(column_values['CreatedDate__c']) end_date = dateutil.parser.parse(column_values['EndTime__c']) ended_by = column_values['EndedBy__c'] status = column_values['Status__c'] # initialize chat request new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': create_date.isoformat(sep=' '), 'Time__c': create_date.isoformat(sep=' '), 'Type__c': 'ChatRequest', 'Detail__c': 'Visitor requested chat.', 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append(data_gen.column_values_to_row(new_column_values)) if status == 'Missed': type__c = choice(['CancelVisitor', 'CancelNoAgent']) if type__c == 'CancelNoAgent': # no agents create_date = fake.date_time_between_dates( create_date, end_date) new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': create_date.isoformat(sep=' '), 'Time__c': create_date.isoformat(sep=' '), 'Type__c': 'ChoiceRoute', 'Detail__c': 'Choice chat request routed to all available qualified agents.', 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append( data_gen.column_values_to_row(new_column_values)) create_date = fake.date_time_between_dates( create_date, end_date) new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': create_date.isoformat(sep=' '), 'Time__c': create_date.isoformat(sep=' '), 'Type__c': type__c, 'Detail__c': type_detail_map[type__c], 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append( data_gen.column_values_to_row(new_column_values)) type__c = choice(['LeaveVisitor', 'EndVisitor']) new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': end_date.isoformat(sep=' '), 'Time__c': end_date.isoformat(sep=' '), 'Type__c': type__c, 'Detail__c': type_detail_map[type__c], 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append( data_gen.column_values_to_row(new_column_values)) else: # visitor canceled new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': end_date.isoformat(sep=' '), 'Time__c': end_date.isoformat(sep=' '), 'Type__c': type__c, 'Detail__c': type_detail_map[type__c], 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append( data_gen.column_values_to_row(new_column_values)) else: type__c = 'ChoiceRoute' new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': create_date.isoformat(sep=' '), 'Time__c': create_date.isoformat(sep=' '), 'Type__c': type__c, 'Detail__c': type_detail_map[type__c], 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append(data_gen.column_values_to_row(new_column_values)) type__c = 'Accept' create_date = fake.date_time_between_dates(create_date, end_date) new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': create_date.isoformat(sep=' '), 'Time__c': create_date.isoformat(sep=' '), 'Type__c': type__c, 'Detail__c': type_detail_map[type__c], 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append(data_gen.column_values_to_row(new_column_values)) if ended_by == 'Visitor': type__c = choice(['LeaveVisitor', 'EndVisitor']) new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': end_date.isoformat(sep=' '), 'Time__c': end_date.isoformat(sep=' '), 'Type__c': type__c, 'Detail__c': type_detail_map[type__c], 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append( data_gen.column_values_to_row(new_column_values)) else: type__c = choice(['LeaveAgent', 'EndAgent']) new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': end_date.isoformat(sep=' '), 'Time__c': end_date.isoformat(sep=' '), 'Type__c': type__c, 'Detail__c': type_detail_map[type__c], 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append( data_gen.column_values_to_row(new_column_values)) data_gen.rows = new_rows # apply transformations and write file output_columns = [ 'External_Id__c', 'LiveChatTranscript.External_Id__c', 'Agent.External_Id__c', 'Type__c', 'Detail__c', 'CreatedDate__c', 'Time__c', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name, reference_date=today_datetime): data_gen = DataGenerator() # load source file source_columns = [ 'External_Id__c', 'StageName', 'Amount', 'ForecastCategory', 'CloseDate', 'CreatedDate__c', 'SalesStageCount__c' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_columns({ 'StageName': 'StageName__c', 'Amount': 'Amount__c', 'ForecastCategory': 'ForecastCategory__c', 'CloseDate': 'CloseDate__c' }) data_gen.add_copy_column('Opportunity.External_Id__c', 'External_Id__c') # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) data_gen.apply_transformations() stages = ['Qualification', 'Discovery', 'Proposal/Quote', 'Negotiation'] forecast_categories = ['BestCase', 'Pipeline', 'Commit'] pipe_bucket = [ 'No Change', 'Reopen', 'Expand', 'Reduce', 'Moved Out', 'Moved In', 'Stage Change' ] pipe_bucket_ratio = [0.10, 0.05, 0.15, 0.15, 0.30, 0.10, 0.15] qualification_pipe_bucket = [ 'No Change', 'Reopen', 'Expand', 'Reduce', 'Moved Out', 'Moved In' ] qualification_pipe_bucket_ratio = [0.20, 0.05, 0.20, 0.10, 0.35, 0.10] zero_amount_pipe_bucket = [ 'No Change', 'Reopen', 'Moved Out', 'Moved In', 'Stage Change' ] zero_amount_pipe_bucket_ratio = [0.20, 0.05, 0.35, 0.10, 0.30] current_count = 1 new_rows = [] row_count = len(data_gen.rows) for i in range(row_count): row = data_gen.rows.pop() column_values = data_gen.row_to_column_values(row) opportunity_id = column_values['Opportunity.External_Id__c'] close_date = dateutil.parser.parse(column_values['CloseDate__c']) create_date = dateutil.parser.parse(column_values['CreatedDate__c']) final_amount = int(column_values['Amount__c']) final_forecast_category = column_values['ForecastCategory__c'] final_stage_name = column_values['StageName__c'] stage_count = int(column_values['SalesStageCount__c']) # initialize most recent event date to reference_date or earlier event_date_range_start = create_date + (close_date - create_date) / 2 event_date_range_end = close_date if close_date > reference_date: event_date_range_end = reference_date event_date_range_start = create_date + (reference_date - create_date) / 2 # ensure event happens on or after opportunity create_date event_date = fake.date_time_between_dates(event_date_range_start, event_date_range_end) # create final state column_values['CreatedDate__c'] = event_date column_values['External_Id__c'] = 'W_OpportunityHistory.' + str( current_count) current_count += 1 new_rows.append(data_gen.column_values_to_row(column_values)) next_create_date = event_date next_stage_name = final_stage_name next_forecast_category = final_forecast_category next_close_date = close_date next_amount = final_amount movedOut = False movedIn = False expand = False reduce = False reopen = False initialized = False # generate events in reverse order until create_date for current_stage_count in range(stage_count): # choose the proper bucket depending on the scenario bucket = pipe_bucket ratio = pipe_bucket_ratio if next_amount <= 0: bucket = zero_amount_pipe_bucket ratio = zero_amount_pipe_bucket_ratio elif next_stage_name == 'Qualification': bucket = qualification_pipe_bucket ratio = qualification_pipe_bucket_ratio event = choice(bucket, p=ratio) event_date_range_end = event_date event_date_range_start = create_date + (event_date - create_date) / 2 event_date = fake.date_time_between_dates(event_date_range_start, event_date_range_end) # if next stage is closed, make the previous event a stage change if 'Closed' in next_stage_name: event = 'Stage Change' # if the event date is the create date, create the initial state if current_stage_count == stage_count - 1: event_date = create_date event = 'Initial State' if event != 'No Change': curr_close_date = next_close_date curr_amount = next_amount curr_stage_name = next_stage_name curr_forecast_category = next_forecast_category if event == 'Reopen' and not reopen: curr_stage_name = 'Closed Lost' curr_forecast_category = 'Omitted' reopen = True elif event == 'Initial State': curr_stage_name = 'Qualification' curr_forecast_category = 'Pipeline' initialized = True elif event == 'Expand' and not expand: curr_amount = next_amount - int( uniform(.15, .45) * final_amount) if curr_amount <= 0: # reduce instead curr_amount = next_amount + int( uniform(.15, .45) * final_amount) expand = True elif event == 'Reduce' and not reduce: curr_amount = next_amount + int( uniform(.15, .45) * final_amount) reduce = True elif event == 'Moved In' and not movedIn: curr_close_date = curr_close_date + timedelta( days=randint(0, 30)) movedIn = True elif event == 'Moved Out' and not movedOut: curr_close_date = curr_close_date - timedelta( days=randint(30, 90)) movedOut = True elif event == 'Stage Change': # if next stage is not closed, use previous stage if 'Closed' not in next_stage_name and stages.index( next_stage_name) - 1 > 0: curr_stage_name = stages[stages.index(next_stage_name) - 1] # if next stage is closed, use any stage elif 'Closed' in next_stage_name: curr_stage_name = stages[randint(1, len(stages) - 1)] else: curr_stage_name = stages[0] curr_forecast_category = forecast_categories[randint( 0, len(forecast_categories) - 1)] new_column_values = { 'External_Id__c': 'W_OpportunityHistory.' + str(current_count), 'Opportunity.External_Id__c': opportunity_id, 'StageName__c': curr_stage_name, 'Amount__c': curr_amount, 'ForecastCategory__c': curr_forecast_category, 'CreatedDate__c': event_date.isoformat(sep=' '), 'CloseDate__c': curr_close_date.date().isoformat(), 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append( data_gen.column_values_to_row(new_column_values)) next_stage_name = curr_stage_name next_forecast_category = curr_forecast_category next_close_date = curr_close_date next_amount = curr_amount data_gen.rows = new_rows data_gen.reverse() data_gen.write(output_file_name, [ 'External_Id__c', 'Amount__c', 'StageName__c', 'ForecastCategory__c', 'CloseDate__c', 'CreatedDate__c', 'Opportunity.External_Id__c', 'analyticsdemo_batch_id__c' ])
def run(batch_id, source_file_name, output_file_name, reference_date=today_datetime): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name, ['External_Id__c']) data_gen.rename_column('External_Id__c', 'User.External_Id__c') data_gen.add_copy_column('Owner.External_Id__c', 'User.External_Id__c') data_gen.duplicate_rows(duplication_factor=lambda: int(normal(60, 10))) data_gen.add_formula_column( 'External_Id__c', lambda: 'W_UserServicePresence.' + str(data_gen.current_row + 1)) data_gen.add_formula_column('AtCapacityDuration__c', lambda: randint(30, 900)) data_gen.add_formula_column('AverageCapacity__c', lambda: randint(30, 500)) data_gen.add_formula_column('ConfiguredCapacity__c', lambda: randint(30, 600)) start_date = reference_date - timedelta(days=365) end_date = reference_date data_gen.add_formula_column( 'CreatedDate__c', lambda: fake.date_time_between_dates( start_date, end_date).isoformat(sep=' ')) data_gen.add_formula_column('IdleDuration__c', lambda: randint(30, 600)) data_gen.add_formula_column('IsCurrentState__c', lambda: choice(['true', 'false'])) data_gen.add_formula_column('IsAway__c', lambda: choice(['true', 'false'])) data_gen.add_formula_column('StatusDuration__c', lambda: randint(30, 900)) data_gen.add_copy_column('StatusStartDate__c', 'CreatedDate__c') def status_end_date_formula(column_values): start_date = dateutil.parser.parse(column_values['StatusStartDate__c']) status_duration = int(column_values['StatusDuration__c']) return (start_date + timedelta(seconds=status_duration)).isoformat(sep=' ') data_gen.add_formula_column('StatusEndDate__c', formula=status_end_date_formula) data_gen.add_formula_column('ServicePresenceStatus.DeveloperName', [ 'Busy', 'Online', 'Available_Live_Agent', 'Busy_Break', 'Busy_Lunch', 'Busy_Training', 'Available_LiveMessage' ]) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) data_gen.apply_transformations() output_columns = [ 'External_Id__c', 'User.External_Id__c', 'Owner.External_Id__c', 'AtCapacityDuration__c', 'AverageCapacity__c', 'ConfiguredCapacity__c', 'CreatedDate__c', 'IdleDuration__c', 'IsAway__c', 'IsCurrentState__c', 'StatusDuration__c', 'StatusStartDate__c', 'StatusEndDate__c', 'ServicePresenceStatus.DeveloperName', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)