def run(batch_id, source_file_name, output_file_name, source_service_resources, delta=timedelta(days=14)):
    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name)


    data_gen.add_formula_column('Start',
                                lambda cv: "" if cv['Start'] == "" else (dateutil.parser.parse(cv['Start']) + timedelta(days=delta.days - 1)).replace(tzinfo=None))

    data_gen.add_formula_column('End',
                                lambda cv: "" if cv['End'] == "" else (dateutil.parser.parse(cv['End']) + timedelta(days=delta.days - 1)).replace(tzinfo=None))


    service_resources = data_gen.load_dataset("ServiceResources", source_service_resources, ['Id', 'External_ID__c']).dict('Id', 'External_ID__c')

    data_gen.add_map_column('Resource.External_Id__c', 'ResourceId', service_resources)

    data_gen.apply_transformations()

    data_gen.add_copy_column('CreatedDate__c', 'Start')

    data_gen.apply_transformations()

    data_gen.write(output_file_name, columns=[
        'External_ID__c',
        'Resource.External_Id__c',
        'CreatedDate__c',
        'Start',
        'End',
        'Type',
        #'State',
        #'Country',
        #'City'
    ])
def run(batch_id, source_file_name, output_file_name):
    data_gen = DataGenerator()

    # load source file
    source_columns = [
        'External_Id__c', 'Owner.External_Id__c', 'CreatedDate__c',
        'EndTime__c', 'EndedBy__c', 'Status__c'
    ]
    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.rename_column('Owner.External_Id__c', 'Agent.External_Id__c')

    data_gen.add_copy_column('LiveChatTranscript.External_Id__c',
                             'External_Id__c')
    data_gen.add_copy_column('Time__c', 'CreatedDate__c')

    data_gen.add_constant_column('Type__c', '')
    data_gen.add_constant_column('Detail__c', '')

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    data_gen.apply_transformations()

    type_detail_map = {
        "ChatRequest": "Visitor requested chat.",
        "ChoiceRoute":
        "Choice chat request routed to all available qualified agents.",
        "CancelNoAgent":
        "Chat request canceled because no qualifying agents were available.",
        "Accept": "Chat request accepted by agent.",
        "CancelVisitor": "Visitor clicked Cancel Chat.",
        "LeaveAgent": "Agent left chat.",
        "EndAgent": "Agent clicked End Chat.",
        "LeaveVisitor": "Visitor left chat.",
        "EndVisitor": "Visitor clicked End Chat."
    }

    current_count = 1
    new_rows = []
    row_count = len(data_gen.rows)
    for i in range(row_count):
        row = data_gen.rows.pop()
        column_values = data_gen.row_to_column_values(row)

        live_chat = column_values['LiveChatTranscript.External_Id__c']
        agent = column_values['Agent.External_Id__c']
        create_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        end_date = dateutil.parser.parse(column_values['EndTime__c'])
        ended_by = column_values['EndedBy__c']
        status = column_values['Status__c']

        # initialize chat request
        new_column_values = {
            'External_Id__c':
            'W_LiveChatTranscriptEvent.' + str(current_count),
            'LiveChatTranscript.External_Id__c': live_chat,
            'Agent.External_Id__c': agent,
            'CreatedDate__c': create_date.isoformat(sep=' '),
            'Time__c': create_date.isoformat(sep=' '),
            'Type__c': 'ChatRequest',
            'Detail__c': 'Visitor requested chat.',
            'analyticsdemo_batch_id__c': batch_id
        }
        current_count += 1
        new_rows.append(data_gen.column_values_to_row(new_column_values))

        if status == 'Missed':
            type__c = choice(['CancelVisitor', 'CancelNoAgent'])
            if type__c == 'CancelNoAgent':
                # no agents
                create_date = fake.date_time_between_dates(
                    create_date, end_date)
                new_column_values = {
                    'External_Id__c':
                    'W_LiveChatTranscriptEvent.' + str(current_count),
                    'LiveChatTranscript.External_Id__c':
                    live_chat,
                    'Agent.External_Id__c':
                    agent,
                    'CreatedDate__c':
                    create_date.isoformat(sep=' '),
                    'Time__c':
                    create_date.isoformat(sep=' '),
                    'Type__c':
                    'ChoiceRoute',
                    'Detail__c':
                    'Choice chat request routed to all available qualified agents.',
                    'analyticsdemo_batch_id__c':
                    batch_id
                }
                current_count += 1
                new_rows.append(
                    data_gen.column_values_to_row(new_column_values))

                create_date = fake.date_time_between_dates(
                    create_date, end_date)
                new_column_values = {
                    'External_Id__c':
                    'W_LiveChatTranscriptEvent.' + str(current_count),
                    'LiveChatTranscript.External_Id__c':
                    live_chat,
                    'Agent.External_Id__c':
                    agent,
                    'CreatedDate__c':
                    create_date.isoformat(sep=' '),
                    'Time__c':
                    create_date.isoformat(sep=' '),
                    'Type__c':
                    type__c,
                    'Detail__c':
                    type_detail_map[type__c],
                    'analyticsdemo_batch_id__c':
                    batch_id
                }
                current_count += 1
                new_rows.append(
                    data_gen.column_values_to_row(new_column_values))

                type__c = choice(['LeaveVisitor', 'EndVisitor'])
                new_column_values = {
                    'External_Id__c':
                    'W_LiveChatTranscriptEvent.' + str(current_count),
                    'LiveChatTranscript.External_Id__c':
                    live_chat,
                    'Agent.External_Id__c':
                    agent,
                    'CreatedDate__c':
                    end_date.isoformat(sep=' '),
                    'Time__c':
                    end_date.isoformat(sep=' '),
                    'Type__c':
                    type__c,
                    'Detail__c':
                    type_detail_map[type__c],
                    'analyticsdemo_batch_id__c':
                    batch_id
                }
                current_count += 1
                new_rows.append(
                    data_gen.column_values_to_row(new_column_values))
            else:
                # visitor canceled
                new_column_values = {
                    'External_Id__c':
                    'W_LiveChatTranscriptEvent.' + str(current_count),
                    'LiveChatTranscript.External_Id__c':
                    live_chat,
                    'Agent.External_Id__c':
                    agent,
                    'CreatedDate__c':
                    end_date.isoformat(sep=' '),
                    'Time__c':
                    end_date.isoformat(sep=' '),
                    'Type__c':
                    type__c,
                    'Detail__c':
                    type_detail_map[type__c],
                    'analyticsdemo_batch_id__c':
                    batch_id
                }
                current_count += 1
                new_rows.append(
                    data_gen.column_values_to_row(new_column_values))
        else:
            type__c = 'ChoiceRoute'
            new_column_values = {
                'External_Id__c':
                'W_LiveChatTranscriptEvent.' + str(current_count),
                'LiveChatTranscript.External_Id__c': live_chat,
                'Agent.External_Id__c': agent,
                'CreatedDate__c': create_date.isoformat(sep=' '),
                'Time__c': create_date.isoformat(sep=' '),
                'Type__c': type__c,
                'Detail__c': type_detail_map[type__c],
                'analyticsdemo_batch_id__c': batch_id
            }
            current_count += 1
            new_rows.append(data_gen.column_values_to_row(new_column_values))

            type__c = 'Accept'
            create_date = fake.date_time_between_dates(create_date, end_date)
            new_column_values = {
                'External_Id__c':
                'W_LiveChatTranscriptEvent.' + str(current_count),
                'LiveChatTranscript.External_Id__c': live_chat,
                'Agent.External_Id__c': agent,
                'CreatedDate__c': create_date.isoformat(sep=' '),
                'Time__c': create_date.isoformat(sep=' '),
                'Type__c': type__c,
                'Detail__c': type_detail_map[type__c],
                'analyticsdemo_batch_id__c': batch_id
            }
            current_count += 1
            new_rows.append(data_gen.column_values_to_row(new_column_values))

            if ended_by == 'Visitor':
                type__c = choice(['LeaveVisitor', 'EndVisitor'])
                new_column_values = {
                    'External_Id__c':
                    'W_LiveChatTranscriptEvent.' + str(current_count),
                    'LiveChatTranscript.External_Id__c':
                    live_chat,
                    'Agent.External_Id__c':
                    agent,
                    'CreatedDate__c':
                    end_date.isoformat(sep=' '),
                    'Time__c':
                    end_date.isoformat(sep=' '),
                    'Type__c':
                    type__c,
                    'Detail__c':
                    type_detail_map[type__c],
                    'analyticsdemo_batch_id__c':
                    batch_id
                }
                current_count += 1
                new_rows.append(
                    data_gen.column_values_to_row(new_column_values))
            else:
                type__c = choice(['LeaveAgent', 'EndAgent'])
                new_column_values = {
                    'External_Id__c':
                    'W_LiveChatTranscriptEvent.' + str(current_count),
                    'LiveChatTranscript.External_Id__c':
                    live_chat,
                    'Agent.External_Id__c':
                    agent,
                    'CreatedDate__c':
                    end_date.isoformat(sep=' '),
                    'Time__c':
                    end_date.isoformat(sep=' '),
                    'Type__c':
                    type__c,
                    'Detail__c':
                    type_detail_map[type__c],
                    'analyticsdemo_batch_id__c':
                    batch_id
                }
                current_count += 1
                new_rows.append(
                    data_gen.column_values_to_row(new_column_values))

    data_gen.rows = new_rows

    # apply transformations and write file
    output_columns = [
        'External_Id__c', 'LiveChatTranscript.External_Id__c',
        'Agent.External_Id__c', 'Type__c', 'Detail__c', 'CreatedDate__c',
        'Time__c', 'analyticsdemo_batch_id__c'
    ]
    data_gen.write(output_file_name, output_columns)
Example #3
0
def run(batch_id,
        source_file_name,
        output_file_name,
        reference_date=today_datetime):
    data_gen = DataGenerator()

    # load source file
    source_columns = [
        'External_Id__c', 'StageName', 'Amount', 'ForecastCategory',
        'CloseDate', 'CreatedDate__c', 'SalesStageCount__c'
    ]
    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.rename_columns({
        'StageName': 'StageName__c',
        'Amount': 'Amount__c',
        'ForecastCategory': 'ForecastCategory__c',
        'CloseDate': 'CloseDate__c'
    })

    data_gen.add_copy_column('Opportunity.External_Id__c', 'External_Id__c')

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    data_gen.apply_transformations()

    stages = ['Qualification', 'Discovery', 'Proposal/Quote', 'Negotiation']
    forecast_categories = ['BestCase', 'Pipeline', 'Commit']

    pipe_bucket = [
        'No Change', 'Reopen', 'Expand', 'Reduce', 'Moved Out', 'Moved In',
        'Stage Change'
    ]
    pipe_bucket_ratio = [0.10, 0.05, 0.15, 0.15, 0.30, 0.10, 0.15]
    qualification_pipe_bucket = [
        'No Change', 'Reopen', 'Expand', 'Reduce', 'Moved Out', 'Moved In'
    ]
    qualification_pipe_bucket_ratio = [0.20, 0.05, 0.20, 0.10, 0.35, 0.10]
    zero_amount_pipe_bucket = [
        'No Change', 'Reopen', 'Moved Out', 'Moved In', 'Stage Change'
    ]
    zero_amount_pipe_bucket_ratio = [0.20, 0.05, 0.35, 0.10, 0.30]

    current_count = 1
    new_rows = []
    row_count = len(data_gen.rows)
    for i in range(row_count):
        row = data_gen.rows.pop()
        column_values = data_gen.row_to_column_values(row)

        opportunity_id = column_values['Opportunity.External_Id__c']
        close_date = dateutil.parser.parse(column_values['CloseDate__c'])
        create_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        final_amount = int(column_values['Amount__c'])
        final_forecast_category = column_values['ForecastCategory__c']
        final_stage_name = column_values['StageName__c']
        stage_count = int(column_values['SalesStageCount__c'])

        # initialize most recent event date to reference_date or earlier
        event_date_range_start = create_date + (close_date - create_date) / 2
        event_date_range_end = close_date

        if close_date > reference_date:
            event_date_range_end = reference_date
            event_date_range_start = create_date + (reference_date -
                                                    create_date) / 2

        # ensure event happens on or after opportunity create_date
        event_date = fake.date_time_between_dates(event_date_range_start,
                                                  event_date_range_end)

        # create final state
        column_values['CreatedDate__c'] = event_date
        column_values['External_Id__c'] = 'W_OpportunityHistory.' + str(
            current_count)
        current_count += 1
        new_rows.append(data_gen.column_values_to_row(column_values))

        next_create_date = event_date
        next_stage_name = final_stage_name
        next_forecast_category = final_forecast_category
        next_close_date = close_date
        next_amount = final_amount

        movedOut = False
        movedIn = False
        expand = False
        reduce = False
        reopen = False
        initialized = False

        # generate events in reverse order until create_date
        for current_stage_count in range(stage_count):
            # choose the proper bucket depending on the scenario
            bucket = pipe_bucket
            ratio = pipe_bucket_ratio
            if next_amount <= 0:
                bucket = zero_amount_pipe_bucket
                ratio = zero_amount_pipe_bucket_ratio
            elif next_stage_name == 'Qualification':
                bucket = qualification_pipe_bucket
                ratio = qualification_pipe_bucket_ratio

            event = choice(bucket, p=ratio)

            event_date_range_end = event_date
            event_date_range_start = create_date + (event_date -
                                                    create_date) / 2
            event_date = fake.date_time_between_dates(event_date_range_start,
                                                      event_date_range_end)

            # if next stage is closed, make the previous event a stage change
            if 'Closed' in next_stage_name:
                event = 'Stage Change'

            # if the event date is the create date, create the initial state
            if current_stage_count == stage_count - 1:
                event_date = create_date
                event = 'Initial State'

            if event != 'No Change':
                curr_close_date = next_close_date
                curr_amount = next_amount
                curr_stage_name = next_stage_name
                curr_forecast_category = next_forecast_category

                if event == 'Reopen' and not reopen:
                    curr_stage_name = 'Closed Lost'
                    curr_forecast_category = 'Omitted'
                    reopen = True
                elif event == 'Initial State':
                    curr_stage_name = 'Qualification'
                    curr_forecast_category = 'Pipeline'
                    initialized = True
                elif event == 'Expand' and not expand:
                    curr_amount = next_amount - int(
                        uniform(.15, .45) * final_amount)
                    if curr_amount <= 0:
                        # reduce instead
                        curr_amount = next_amount + int(
                            uniform(.15, .45) * final_amount)
                    expand = True
                elif event == 'Reduce' and not reduce:
                    curr_amount = next_amount + int(
                        uniform(.15, .45) * final_amount)
                    reduce = True
                elif event == 'Moved In' and not movedIn:
                    curr_close_date = curr_close_date + timedelta(
                        days=randint(0, 30))
                    movedIn = True
                elif event == 'Moved Out' and not movedOut:
                    curr_close_date = curr_close_date - timedelta(
                        days=randint(30, 90))
                    movedOut = True
                elif event == 'Stage Change':
                    # if next stage is not closed, use previous stage
                    if 'Closed' not in next_stage_name and stages.index(
                            next_stage_name) - 1 > 0:
                        curr_stage_name = stages[stages.index(next_stage_name)
                                                 - 1]
                    # if next stage is closed, use any stage
                    elif 'Closed' in next_stage_name:
                        curr_stage_name = stages[randint(1, len(stages) - 1)]
                    else:
                        curr_stage_name = stages[0]
                    curr_forecast_category = forecast_categories[randint(
                        0,
                        len(forecast_categories) - 1)]

                new_column_values = {
                    'External_Id__c':
                    'W_OpportunityHistory.' + str(current_count),
                    'Opportunity.External_Id__c': opportunity_id,
                    'StageName__c': curr_stage_name,
                    'Amount__c': curr_amount,
                    'ForecastCategory__c': curr_forecast_category,
                    'CreatedDate__c': event_date.isoformat(sep=' '),
                    'CloseDate__c': curr_close_date.date().isoformat(),
                    'analyticsdemo_batch_id__c': batch_id
                }
                current_count += 1
                new_rows.append(
                    data_gen.column_values_to_row(new_column_values))

                next_stage_name = curr_stage_name
                next_forecast_category = curr_forecast_category
                next_close_date = curr_close_date
                next_amount = curr_amount

    data_gen.rows = new_rows
    data_gen.reverse()

    data_gen.write(output_file_name, [
        'External_Id__c', 'Amount__c', 'StageName__c', 'ForecastCategory__c',
        'CloseDate__c', 'CreatedDate__c', 'Opportunity.External_Id__c',
        'analyticsdemo_batch_id__c'
    ])
Example #4
0
def run(batch_id,
        source_file_name,
        output_file_name,
        reference_datetime=today):
    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name)

    data_gen.rename_column('External_Id__c', 'Case.External_Id__c')
    data_gen.rename_column('Owner.External_Id__c', 'User.External_Id__c')

    data_gen.duplicate_rows(duplication_factor=lambda: choice(
        [1, 2, 3, 4, 5], p=[.65, .15, .10, .05, .05]))

    data_gen.add_formula_column(
        'External_Id__c',
        formula=lambda: 'W_AgentWork.' + str(data_gen.current_row + 1))

    data_gen.add_copy_column('RequestDateTime__c', 'CreatedDate__c')

    def created_date_formula(column_values):
        created_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        closed_date = dateutil.parser.parse(column_values['ClosedDate__c'])
        if closed_date > reference_datetime:
            closed_date = reference_datetime
        mid_date = created_date + (closed_date - created_date) / 2
        return fake.date_time_between_dates(created_date,
                                            mid_date).isoformat(sep=' ')

    data_gen.add_formula_column('CreatedDate__c', created_date_formula)

    def assigned_date_formula(column_values):
        created_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        return (created_date +
                timedelta(seconds=randint(0, 120))).isoformat(sep=' ')

    data_gen.add_formula_column('AssignedDateTime__c', assigned_date_formula)

    def accept_date_formula(column_values):
        assigned_date = dateutil.parser.parse(
            column_values['AssignedDateTime__c'])
        return (assigned_date +
                timedelta(seconds=randint(30, 600))).isoformat(sep=' ')

    data_gen.add_formula_column('AcceptDateTime__c', accept_date_formula)

    def close_date_formula(column_values):
        accept_date = dateutil.parser.parse(column_values['AcceptDateTime__c'])
        return (accept_date +
                timedelta(seconds=randint(30, 1800))).isoformat(sep=' ')

    data_gen.add_formula_column('CloseDateTime__c', close_date_formula)

    def active_time_formula(column_values):
        accept_date = dateutil.parser.parse(column_values['AcceptDateTime__c'])
        close_date = dateutil.parser.parse(column_values['CloseDateTime__c'])
        return int((close_date - accept_date).total_seconds())

    data_gen.add_formula_column('ActiveTime__c', active_time_formula)

    data_gen.add_formula_column('AgentCapacityWhenDeclined__c',
                                lambda: randint(30, 1800))

    def cancel_date_formula(column_values):
        assigned_date = dateutil.parser.parse(
            column_values['AssignedDateTime__c'])
        return (assigned_date +
                timedelta(seconds=randint(30, 600))).isoformat(sep=' ')

    data_gen.add_formula_column('CancelDateTime__c', cancel_date_formula)

    data_gen.add_formula_column('CapacityPercentage__c',
                                lambda: randint(1, 101))

    data_gen.add_formula_column('CapacityWeight__c', lambda: randint(1, 7))

    def decline_date_formula(column_values):
        assigned_date = dateutil.parser.parse(
            column_values['AssignedDateTime__c'])
        return (assigned_date +
                timedelta(seconds=randint(30, 600))).isoformat(sep=' ')

    data_gen.add_formula_column('DeclineDateTime__c', decline_date_formula)

    data_gen.add_formula_column('DeclineReason__c', formula=fake.sentence)

    data_gen.add_copy_column('HandleTime__c', 'ActiveTime__c')

    data_gen.add_formula_column('OriginalQueue.DeveloperName', [
        'GeneralQueue', 'InternationalQueue', 'Knowledge_Translations',
        'Social_Queue', 'TargetCampaign', 'Tier1Queue', 'Tier2Queue',
        'Tier3Queue'
    ])

    data_gen.add_formula_column('PushTimeout__c', lambda: randint(0, 100))

    def push_timeout_date_formula(column_values):
        create_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        return create_date + timedelta(seconds=column_values['PushTimeout__c'])

    data_gen.add_formula_column('PushTimeoutDateTime__c',
                                push_timeout_date_formula)

    data_gen.add_formula_column(
        'ServiceChannel.DeveloperName',
        ['Cases', 'LiveMessage', 'sfdc_liveagent', 'Leads'])

    def speed_to_answer_formula(column_values):
        request_date = dateutil.parser.parse(
            column_values['RequestDateTime__c'])
        accept_date = dateutil.parser.parse(column_values['AcceptDateTime__c'])
        return int((accept_date - request_date).total_seconds())

    data_gen.add_formula_column('SpeedToAnswer__c', speed_to_answer_formula)

    data_gen.add_formula_column('Status__c', [
        'Assigned', 'Unavailable', 'Declined', 'Opened', 'Closed',
        'DeclinedOnPushTimeout', 'Canceled'
    ])

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    def filter_func(column_values):
        created_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        cutoff_date = reference_datetime - timedelta(days=60)
        return column_values['Origin'] == 'Chat' and created_date >= cutoff_date

    data_gen.filter(filter_function=filter_func)

    data_gen.apply_transformations()

    data_gen.sort_by('RequestDateTime__c')

    output_columns = [
        'External_Id__c', 'RequestDateTime__c', 'CreatedDate__c',
        'AssignedDateTime__c', 'AcceptDateTime__c', 'CloseDateTime__c',
        'ActiveTime__c', 'AgentCapacityWhenDeclined__c', 'CancelDateTime__c',
        'CapacityPercentage__c', 'CapacityWeight__c', 'DeclineDateTime__c',
        'DeclineReason__c', 'HandleTime__c', 'OriginalQueue.DeveloperName',
        'PushTimeout__c', 'PushTimeoutDateTime__c',
        'ServiceChannel.DeveloperName', 'SpeedToAnswer__c', 'Status__c',
        'User.External_Id__c', 'Case.External_Id__c',
        'analyticsdemo_batch_id__c'
    ]
    return data_gen.write(output_file_name, output_columns, 6000)
Example #5
0
def run(batch_id, source_file_name, output_file_name, reference_datetime=today):
    data_gen = DataGenerator()

    # load source file
    source_columns = [
        'External_Id__c',
        'Owner.External_Id__c',
        'CreatedDate__c',
        'LastActivityDate__c'
    ]
    data_gen.load_source_file(source_file_name, source_columns)


    data_gen.rename_column('External_Id__c', 'Case.External_Id__c')

    data_gen.duplicate_rows(duplication_factor=lambda: randint(0, 3))


    data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_Services_Event.' + str(data_gen.current_row + 1))


    data_gen.add_formula_column('Subject', formula=event.event_subject)
    data_gen.add_formula_column('EventSubtype', formula=event.event_subtype)
    data_gen.add_formula_column('DurationInMinutes', formula=event.event_call_duration)

    def create_date_formula(column_values):
        case_create_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        case_close_date = datetime.combine(dateutil.parser.parse(column_values['LastActivityDate__c']), case_create_date.time())
        if case_close_date > reference_datetime:
            case_close_date = reference_datetime
        create_date = fake.date_time_between_dates(case_create_date, case_close_date)
        if create_date > reference_datetime:
            create_date = reference_datetime
        return create_date.isoformat(sep=' ')
    data_gen.add_formula_column('CreatedDate__c', create_date_formula)

    data_gen.add_copy_column('LastModifiedDate__c', 'CreatedDate__c')


    def activity_date_formula(column_values):
        create_date = dateutil.parser.parse(column_values['CreatedDate__c']).date()
        return (create_date + timedelta(days=randint(0, 14))).isoformat()
    data_gen.add_formula_column('ActivityDate', activity_date_formula)


    def activity_datetime_formula(column_values):
        return dateutil.parser.parse(column_values['ActivityDate'])
    data_gen.add_formula_column('ActivityDateTime', activity_datetime_formula)


    data_gen.add_constant_column('ShowAs', 'Busy')

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write
    data_gen.apply_transformations()

    output_columns = [
        'External_Id__c',
        'Owner.External_Id__c',
        'Case.External_Id__c',
        'Subject',
        'EventSubtype',
        'DurationInMinutes',
        'ShowAs',
        'CreatedDate__c',
        'LastModifiedDate__c',
        'ActivityDate',
        'ActivityDateTime',
        'analyticsdemo_batch_id__c'
    ]
    data_gen.write(output_file_name, output_columns)
def run(batch_id,
        source_file_name,
        output_file_name,
        reference_datetime=today):
    data_gen = DataGenerator()

    # load source file
    source_columns = [
        'External_Id__c', 'Owner.External_Id__c', 'CreatedDate__c',
        'ClosedDate__c', 'Origin'
    ]
    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.rename_column('External_Id__c', 'Case.External_Id__c')
    data_gen.rename_column('ClosedDate__c', 'EndTime__c')

    data_gen.duplicate_rows(duplication_factor=lambda: choice(
        [1, 2, 3, 4, 5], p=[.65, .15, .10, .05, .05]))

    data_gen.add_formula_column(
        'External_Id__c',
        lambda: 'W_LiveChatTranscript.' + str(data_gen.current_row + 1))

    data_gen.add_formula_column('Abandoned__c', lambda: randint(1, 300))

    data_gen.add_formula_column('AverageResponseTimeOperator__c',
                                lambda: randint(1, 180))

    data_gen.add_formula_column('AverageResponseTimeVisitor__c',
                                lambda: randint(1, 180))

    data_gen.add_formula_column('Body__c', formula=fake.body)

    data_gen.add_formula_column('Browser__c', formula=fake.browser)

    data_gen.add_constant_column('BrowserLanguage__c', 'en_US')

    data_gen.add_formula_column('ChatDuration__c', lambda: randint(1, 600))

    data_gen.add_formula_column('ChatKey__c', formula=fake.md5)

    data_gen.add_formula_column('IpAddress__c', formula=fake.ipv4)

    data_gen.add_formula_column('LiveChatButton.DeveloperName',
                                ['Public_Website_Chat_Button'])

    data_gen.add_formula_column('Location__c', formula=fake.city)

    data_gen.add_formula_column('MaxResponseTimeOperator__c',
                                lambda: randint(1, 120))

    data_gen.add_formula_column('MaxResponseTimeVisitor__c',
                                lambda: randint(1, 240))

    data_gen.add_formula_column('Name__c',
                                lambda: str(data_gen.current_row + 1).zfill(8))

    data_gen.add_formula_column('OperatorMessageCount__c',
                                lambda: randint(1, 100))

    data_gen.add_formula_column(
        'Platform__c', ['MacOSX', 'iOS', 'Android', 'Windows', 'Unix'])

    referrer = [
        "https://na17.salesforce.com/setup/forcecomHomepage.apexp?setupid=ForceCom&retURL=%2Fui%2Fsupport%2Fservicedesk%2FServiceDeskPage",
        "https://na13.salesforce.com/home/home.jsp",
        "https://sdodemo-main.force.com/partners/servlet/servlet.Integration?lid=01ra0000001VlbA&ic=1",
        "https://sitestudio.na17.force.com/?exitURL=%2F_ui%2Fnetworks%2Fsetup%2FSetupNetworksPage%2Fd",
        "https://mail.google.com/mail/u/0/",
        "https://sdodemo-main.force.com/customers/servlet/servlet.Integration?lid=01ra0000001VlbP&ic=1",
        "https://sdodemo-main.force.com/consumers/servlet/servlet.Integration?lid=01ro0000000EN78&ic=1",
        "https://na17.salesforce.com/servlet/servlet.su?oid=00D300000007EfQ&retURL=%2F0033000000PuxU2&sunetworkuserid=005a000000AuCha&sunetworkid=0DBo0000000Gn4h",
        "https://sdodemo-main.force.com/customers/servlet/servlet.Integration?ic=1&lid=01ra0000001VlbP"
    ]
    data_gen.add_formula_column('ReferrerUri__c', referrer)

    def create_date_formula(column_values):
        case_create_date = dateutil.parser.parse(
            column_values['CreatedDate__c'])
        case_close_date = dateutil.parser.parse(column_values['EndTime__c'])
        create_date = fake.date_time_between_dates(case_create_date,
                                                   case_close_date)
        if create_date > reference_datetime:
            create_date = reference_datetime
        return create_date.isoformat(sep=' ')

    data_gen.add_formula_column('CreatedDate__c', create_date_formula)

    def start_time_formula(column_values):
        create_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        start_time = create_date + timedelta(seconds=randint(1, 300))
        return start_time.isoformat(sep=' ')

    data_gen.add_formula_column('StartTime__c', start_time_formula)

    def end_time_formula(column_values):
        create_date = dateutil.parser.parse(column_values['StartTime__c'])
        end_time = create_date + timedelta(seconds=randint(1, 600))
        return end_time.isoformat(sep=' ')

    data_gen.add_formula_column('EndTime__c', end_time_formula)

    data_gen.add_copy_column('RequestTime__c', 'CreatedDate__c')

    data_gen.add_formula_column(
        'Status__c', lambda: choice(['Missed', 'Completed'], p=[.20, .80]))

    data_gen.add_map_column('EndedBy__c', 'Status__c', {
        'Completed': ['Visitor', 'Agent'],
        None: 'Visitor'
    })

    data_gen.add_constant_column('SupervisorTranscriptBody__c', '')

    data_gen.add_constant_column('ScreenResolution__c', '')

    data_gen.add_formula_column('UserAgent__c', formula=fake.user_agent)

    data_gen.add_formula_column('VisitorMessageCount__c',
                                lambda: randint(1, 50))

    data_gen.add_formula_column('WaitTime__c', lambda: randint(1, 120))

    def last_referenced_date_formula(column_values):
        create_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        last_referenced_date = create_date + timedelta(seconds=randint(1, 300))
        return last_referenced_date.isoformat(sep=' ')

    data_gen.add_formula_column('LastReferencedDate__c',
                                last_referenced_date_formula)

    data_gen.add_copy_column('LastViewedDate__c', 'LastReferencedDate__c')

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    def filter_func(column_values):
        return column_values['Origin'] == 'Chat'

    data_gen.filter(filter_function=filter_func)

    # apply transformations and write file
    data_gen.apply_transformations()

    data_gen.sort_by('StartTime__c')

    output_columns = [
        'External_Id__c', 'Abandoned__c', 'AverageResponseTimeOperator__c',
        'MaxResponseTimeOperator__c', 'OperatorMessageCount__c', 'Body__c',
        'Browser__c', 'BrowserLanguage__c', 'Case.External_Id__c',
        'ChatDuration__c', 'ChatKey__c', 'CreatedDate__c', 'StartTime__c',
        'EndTime__c', 'EndedBy__c', 'LastReferencedDate__c',
        'LastViewedDate__c', 'LiveChatButton.DeveloperName', 'Location__c',
        'Owner.External_Id__c', 'Platform__c', 'ReferrerUri__c',
        'ScreenResolution__c', 'RequestTime__c', 'Status__c',
        'SupervisorTranscriptBody__c', 'UserAgent__c',
        'AverageResponseTimeVisitor__c', 'IpAddress__c',
        'MaxResponseTimeVisitor__c', 'VisitorMessageCount__c', 'WaitTime__c',
        'analyticsdemo_batch_id__c'
    ]
    data_gen.write(output_file_name, output_columns)
Example #7
0
def run(batch_id, source_file_name, output_file_name, reference_date=today_datetime, filter_function=None):

    def get_close_date(values):
        return dateutil.parser.parse(values['CloseDate'])

    def get_create_date(values):
        return dateutil.parser.parse(values['CreatedDate__c'])

    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name)

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # add an age column
    data_gen.add_copy_column('Age__c', 'TimeToClose__c')

    # generate a close date
    def close_date_formula(column_values):
        last_day = date(date.today().year, 12, 31)
        offset = column_values['close_date_offset__c']
        # last day of current year - offset
        close_date = last_day - timedelta(days=int(offset))
        return str(close_date)
    data_gen.add_formula_column('CloseDate', close_date_formula)


    # generate a create date
    def create_date_formula(column_values):
        close_date = dateutil.parser.parse(column_values['CloseDate'])
        offset = column_values['TimeToClose__c']
        create_date = close_date - timedelta(days=int(offset))
        return create_date.isoformat(sep=' ')
    data_gen.add_formula_column('CreatedDate__c', create_date_formula)

    # generate last activity date
    def last_activity_date_formula(column_values):
        create_date = get_create_date(column_values)
        close_date = get_close_date(column_values)
        if close_date > reference_date:
            close_date = reference_date
        if create_date > reference_date:
            create_date = reference_date
        return fake.date_time_between_dates(create_date, close_date).date()
    data_gen.add_formula_column('LastActivityDate__c', formula=last_activity_date_formula)

    data_gen.apply_transformations()

    if filter_function:
        data_gen.filter(filter_function)

    new_rows = []
    row_count = len(data_gen.rows)
    for i in range(row_count):
        row = data_gen.rows.pop()
        column_values = data_gen.row_to_column_values(row)

        close_day = get_close_date(column_values)
        create_day = get_create_date(column_values)

        # if close date is before reference date keep it exactly as is
        if close_day <= reference_date:
            new_rows.append(row)

        # if create date is before reference date, but the close date is after reference date
        elif (create_day <= reference_date) and (close_day > reference_date):
            # set age
            age = (reference_date - create_day).days
            column_values['Age__c'] = age

            ttc = float(column_values['TimeToClose__c'])
            pct = age / ttc

            # set IsClosed to blank
            column_values['IsClosed'] = ''

            # set IsWon to blank
            column_values['IsWon'] = ''

            # set a stage name
            stage_name_index = int(floor(pct * 4) + choice([-1, 0, 1], p=[.2, .7, .1]))

            # adjust the stage name index
            if stage_name_index < 0:
                stage_name_index = 0
            if stage_name_index > 3:
                stage_name_index = 3

            column_values['StageName'] = definitions.stage_name[stage_name_index]

            column_values['Probability'] = definitions.probabilities[stage_name_index]

            column_values['ForecastCategory'] = definitions.forecast_category[choice([1, 2, 4], p=[.625, .25, .125])]

            column_values['ForecastCategoryName'] = definitions.forecast_category_name[column_values['ForecastCategory']]

            column_values['SalesStageCount__c'] = ceil(pct * float(column_values['SalesStageCount__c']))

            new_rows.append(data_gen.column_values_to_row(column_values))



    data_gen.rows = new_rows
    data_gen.reverse()

    data_gen.write(output_file_name)
Example #8
0
def run(batch_id, source_file_name, output_file_name, shape_file_name):
    data_gen = DataGenerator()

    # load source file
    source_columns = ['External_Id__c', 'Owner.External_Id__c']
    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.unique()

    # rename columns
    data_gen.rename_column('External_Id__c', 'Account.External_Id__c')

    data_gen.add_formula_column(
        'External_Id__c',
        lambda: 'W_Services_Opportunity.' + str(data_gen.current_row + 1))

    stages = [
        'Qualification', 'Needs Analysis', 'Proposal/Quote', 'Negotiation',
        'Closed Won', 'Closed Lost'
    ]
    data_gen.add_formula_column(
        'StageName', lambda: choice(stages, p=[.25, .20, .15, .10, .15, .15]))

    types = ['New Business', 'Add-On Business', 'Services', 'Renewal']
    data_gen.add_formula_column('Type',
                                lambda: choice(types, p=[.45, .27, .18, .10]))

    products = [
        "GC20002", "GC5000 series", "GC10001", "GC50000", "GC1000 series"
    ]
    data_gen.add_formula_column('Products__c', products)

    data_gen.add_formula_column('Amount',
                                lambda: 1000 * int(normal(1400, 350)))

    data_gen.add_formula_column(
        'Name',
        lambda cv: 'New Opportunity [' + str(data_gen.current_row + 1) + ']')

    # load shape data as dataset
    shape_columns = [
        'Account.External_Id__c', 'CreatedDate__c', 'LastActivityDate__c'
    ]
    shape_dataset = data_gen.load_dataset('shape', shape_file_name,
                                          shape_columns)

    # build map of account values
    shape_account_map = shape_dataset.group_by('Account.External_Id__c')

    # generate earliest created date
    def create_date_formula(column_values):
        accounts = shape_account_map.get(
            column_values['Account.External_Id__c'])
        create_dates = [
            dateutil.parser.parse(account['CreatedDate__c'])
            for account in accounts
        ]
        create_dates.sort()
        return (create_dates[0] -
                timedelta(days=randint(1, 45))).isoformat(sep=' ')

    data_gen.add_formula_column('DateTimeCreated__c', create_date_formula)

    # generate last activity date
    def last_activity_date_formula(column_values):
        accounts = shape_account_map.get(
            column_values['Account.External_Id__c'])
        activity_dates = [
            dateutil.parser.parse(account['LastActivityDate__c'])
            for account in accounts
        ]
        activity_dates.sort(reverse=True)
        return activity_dates[0].isoformat(sep=' ')

    data_gen.add_formula_column('LastActivityDate__c',
                                last_activity_date_formula)

    data_gen.add_copy_column('CloseDate', 'DateTimeCreated__c')

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write file
    data_gen.apply_transformations()

    output_columns = [
        'External_Id__c', 'Owner.External_Id__c', 'Account.External_Id__c',
        'DateTimeCreated__c', 'CloseDate', 'LastActivityDate__c', 'Name',
        'Products__c', 'StageName', 'Amount', 'Type',
        'analyticsdemo_batch_id__c'
    ]
    data_gen.write(output_file_name, output_columns)
def run(batch_id,
        source_file_name,
        output_file_name,
        reference_date=today_datetime):
    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name, ['External_Id__c'])

    data_gen.rename_column('External_Id__c', 'User.External_Id__c')

    data_gen.add_copy_column('Owner.External_Id__c', 'User.External_Id__c')

    data_gen.duplicate_rows(duplication_factor=lambda: int(normal(60, 10)))

    data_gen.add_formula_column(
        'External_Id__c',
        lambda: 'W_UserServicePresence.' + str(data_gen.current_row + 1))

    data_gen.add_formula_column('AtCapacityDuration__c',
                                lambda: randint(30, 900))

    data_gen.add_formula_column('AverageCapacity__c', lambda: randint(30, 500))

    data_gen.add_formula_column('ConfiguredCapacity__c',
                                lambda: randint(30, 600))

    start_date = reference_date - timedelta(days=365)
    end_date = reference_date

    data_gen.add_formula_column(
        'CreatedDate__c', lambda: fake.date_time_between_dates(
            start_date, end_date).isoformat(sep=' '))

    data_gen.add_formula_column('IdleDuration__c', lambda: randint(30, 600))

    data_gen.add_formula_column('IsCurrentState__c',
                                lambda: choice(['true', 'false']))

    data_gen.add_formula_column('IsAway__c', lambda: choice(['true', 'false']))

    data_gen.add_formula_column('StatusDuration__c', lambda: randint(30, 900))

    data_gen.add_copy_column('StatusStartDate__c', 'CreatedDate__c')

    def status_end_date_formula(column_values):
        start_date = dateutil.parser.parse(column_values['StatusStartDate__c'])
        status_duration = int(column_values['StatusDuration__c'])
        return (start_date +
                timedelta(seconds=status_duration)).isoformat(sep=' ')

    data_gen.add_formula_column('StatusEndDate__c',
                                formula=status_end_date_formula)

    data_gen.add_formula_column('ServicePresenceStatus.DeveloperName', [
        'Busy', 'Online', 'Available_Live_Agent', 'Busy_Break', 'Busy_Lunch',
        'Busy_Training', 'Available_LiveMessage'
    ])

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    data_gen.apply_transformations()

    output_columns = [
        'External_Id__c', 'User.External_Id__c', 'Owner.External_Id__c',
        'AtCapacityDuration__c', 'AverageCapacity__c', 'ConfiguredCapacity__c',
        'CreatedDate__c', 'IdleDuration__c', 'IsAway__c', 'IsCurrentState__c',
        'StatusDuration__c', 'StatusStartDate__c', 'StatusEndDate__c',
        'ServicePresenceStatus.DeveloperName', 'analyticsdemo_batch_id__c'
    ]
    data_gen.write(output_file_name, output_columns)
def run(batch_id,
        source_file_name,
        output_file_name,
        source_accounts,
        source_service_resources,
        source_service_territories,
        source_work_orders,
        reference_datetime=today):
    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name)

    data_gen.add_formula_column(
        'EarliestStartTime',
        lambda cv: dateutil.parser.parse(cv['EarliestStartTime']))

    data_gen.apply_transformations()

    data_gen.sort_by('EarliestStartTime', reverse=True)

    # shift dates to be 2 weeks prior to the reference date
    delta = reference_datetime.date() - data_gen.row_to_column_values(
        data_gen.rows[0])['EarliestStartTime'].date()
    data_gen.add_formula_column(
        'EarliestStartTime', lambda cv: (cv['EarliestStartTime'] + timedelta(
            days=delta.days - 1)).replace(tzinfo=None))

    data_gen.add_formula_column(
        'ActualStartTime', lambda cv: "" if cv['ActualStartTime'] == "" else
        (dateutil.parser.parse(cv['ActualStartTime']) + timedelta(
            days=delta.days - 1)).replace(tzinfo=None))

    data_gen.add_formula_column(
        'ActualEndTime', lambda cv: "" if cv['ActualEndTime'] == "" else
        (dateutil.parser.parse(cv['ActualEndTime']) + timedelta(
            days=delta.days - 1)).replace(tzinfo=None))

    data_gen.add_formula_column(
        'ArrivalWindowStartTime', lambda cv: ""
        if cv['ArrivalWindowStartTime'] == "" else
        (dateutil.parser.parse(cv['ArrivalWindowStartTime']) + timedelta(
            days=delta.days - 1)).replace(tzinfo=None))

    data_gen.add_formula_column(
        'ArrivalWindowEndTime', lambda cv: ""
        if cv['ArrivalWindowEndTime'] == "" else
        (dateutil.parser.parse(cv['ArrivalWindowEndTime']) + timedelta(
            days=delta.days - 1)).replace(tzinfo=None))

    data_gen.add_formula_column(
        'DueDate', lambda cv: ""
        if cv['DueDate'] == "" else (dateutil.parser.parse(cv[
            'DueDate']) + timedelta(days=delta.days - 1)).replace(tzinfo=None))

    data_gen.apply_transformations()

    data_gen.add_copy_column('CreatedDate__c', 'EarliestStartTime')

    accounts = data_gen.load_dataset("Accounts", source_accounts,
                                     ['Id', 'External_ID__c']).dict(
                                         'Id', 'External_ID__c')

    data_gen.add_map_column('Account.External_Id__c', 'AccountId', accounts)

    service_resources = data_gen.load_dataset("ServiceResources",
                                              source_service_resources,
                                              ['Id', 'External_ID__c']).dict(
                                                  'Id', 'External_ID__c')

    data_gen.add_map_column('ServiceResource.External_Id__c',
                            'FSLDemoTools_Service_Resource__c',
                            service_resources)

    service_territories = data_gen.load_dataset("ServiceTerritories",
                                                source_service_territories,
                                                ['Id', 'External_ID__c']).dict(
                                                    'Id', 'External_ID__c')

    data_gen.add_map_column('ServiceTerritory.External_Id__c',
                            'ServiceTerritoryId', service_territories)

    work_orders = data_gen.load_dataset("WorkOrders", source_work_orders,
                                        ['Id', 'External_ID__c']).dict(
                                            'Id', 'External_ID__c')

    data_gen.add_map_column('WorkOrder.External_Id__c', 'ParentRecordId',
                            work_orders)

    data_gen.apply_transformations()

    data_gen.filter(
        lambda cv: cv['WorkOrder.External_Id__c'].startswith('WO.'))

    data_gen.apply_transformations()

    data_gen.write(
        output_file_name,
        columns=[
            'External_ID__c', 'CreatedDate__c',
            'ServiceResource.External_Id__c',
            'ServiceTerritory.External_Id__c', 'WorkOrder.External_Id__c',
            'ActualStartTime', 'ArrivalWindowStartTime', 'ActualDuration',
            'EarliestStartTime', 'Duration', 'DurationType', 'Status',
            'DueDate', 'ActualEndTime', 'ArrivalWindowEndTime'
        ])
    return delta
Example #11
0
def run(batch_id, source_file_name, output_file_name, reference_datetime=today):
    data_gen = DataGenerator()

    # load source file
    source_columns = [
        'External_Id__c',
        'Owner.External_Id__c',
        'CreatedDate__c',
        'LastActivityDate__c',
        'Team__c'
    ]
    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.rename_column('External_Id__c', 'Case.External_Id__c')
    data_gen.rename_column('LastActivityDate__c', 'ActivityDate')
    data_gen.rename_column('Team__c', 'CallObject')


    # generate a random number of tasks per case
    data_gen.duplicate_rows(duplication_factor=lambda: randint(0, 3))


    data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_Services_Task.' + str(data_gen.current_row + 1))


    data_gen.add_formula_column('TaskSubtype', formula=task.task_subtype)
    data_gen.add_formula_column('CallDurationInSeconds', formula=task.task_call_duration)
    data_gen.add_formula_column('CallDisposition', formula=task.task_call_disposition)
    data_gen.add_formula_column('CallType', formula=task.task_call_type)


    data_gen.add_formula_column('Status', formula=task.task_status)

    data_gen.add_formula_column('Priority', formula=task.task_priority)

    def create_date_formula(column_values):
        case_create_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        case_close_date = datetime.combine(dateutil.parser.parse(column_values['ActivityDate']), case_create_date.time())
        create_date = fake.date_time_between_dates(case_create_date, case_close_date)
        if create_date > reference_datetime:
            create_date = reference_datetime
        return create_date.isoformat(sep=' ')
    data_gen.add_formula_column('CreatedDate__c', create_date_formula)

    data_gen.add_copy_column('LastModifiedDate__c', 'CreatedDate__c')

    def activity_date_formula(column_values):
        create_date = dateutil.parser.parse(column_values['CreatedDate__c']).date()
        return (create_date + timedelta(days=randint(0, 14))).isoformat()
    data_gen.add_formula_column('ActivityDate', activity_date_formula)


    data_gen.add_formula_column('Subject', formula=task.task_subject_simple)

    data_gen.add_map_column('Type', 'Subject', value_map={
        'Call': lambda: choice(['Call', 'Meeting'], p=[.70, .30]),
        'Send Letter': 'Email',
        'Send Quote': 'Email',
        None: lambda: choice(['Meeting', 'Prep', 'Other'], p=[.50, .25, .25])
    })

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write
    data_gen.apply_transformations()

    output_columns = [
        'External_Id__c',
        'Owner.External_Id__c',
        'Case.External_Id__c',
        'CreatedDate__c',
        'LastModifiedDate__c',
        'ActivityDate',
        'Subject',
        'Type',
        'TaskSubtype',
        'CallDurationInSeconds',
        'CallDisposition',
        'CallType',
        'CallObject',
        'Status',
        'Priority',
        'analyticsdemo_batch_id__c'
    ]
    data_gen.write(output_file_name, output_columns)
Example #12
0
def run(batch_id, source_file_name, product_output_file_name,
        pricebook_output_file_name):
    data_gen = DataGenerator()

    # load source file
    source_columns = ['Product2Name__c', 'Product2Family__c']
    data_gen.load_source_file(source_file_name, source_columns)

    # rename columns
    data_gen.rename_column('Product2Name__c', 'Name')
    data_gen.rename_column('Product2Family__c', 'Family')

    # filter out duplicate data
    data_gen.unique()

    # generate product code
    data_gen.add_formula_column(
        'External_Id__c',
        formula=lambda: 'W_Product.' + str(data_gen.current_row + 1))

    data_gen.add_copy_column('ProductCode', 'External_Id__c')

    # apply transformations and write Product2 file
    data_gen.apply_transformations()
    data_gen.write(product_output_file_name)

    # generate pricebook entry code
    data_gen.add_formula_column(
        'External_Id__c',
        formula=lambda: 'W_PricebookEntry.' + str(data_gen.current_row + 1))

    # generate product id reference
    data_gen.add_copy_column('Product2.External_Id__c', 'ProductCode')

    # get map of product names to opportunity amounts
    shape_dataset = data_gen.load_dataset('shape', source_file_name,
                                          ['Product2Name__c', 'Amount'])
    amounts_by_product_name = shape_dataset.group_by('Product2Name__c')

    # generate unit price
    def unit_price_formula(column_values):
        # find average opportunity amount for product
        product_name = column_values['Name']
        amounts = amounts_by_product_name[product_name]
        avg_amount = 0
        count = 0
        for amount in amounts:
            amount = int(amount['Amount'])
            if amount > 0:
                count += 1
                avg_amount += amount
        avg_amount = avg_amount / count
        random_quantity = randint(1, 100)
        return int(avg_amount / random_quantity)

    data_gen.add_formula_column('UnitPrice', formula=unit_price_formula)

    data_gen.add_constant_column('IsActive', 'true')
    data_gen.add_constant_column('Pricebook2.Name', 'Standard Price Book')

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write PricebookEntry file
    data_gen.apply_transformations()

    data_gen.write(pricebook_output_file_name, [
        'External_Id__c', 'Product2.External_Id__c', 'IsActive',
        'Pricebook2.Name', 'UnitPrice', 'analyticsdemo_batch_id__c'
    ])