def generate(self, selected_filters=None, columns=None, count=5):
        if selected_filters is None:
            selected_filters = {}
        if columns is None:
            columns = self.get_columns()

        data_gen = DataGenerator()
        data_gen.row_count = count

        if 'gender' in selected_filters:
            if selected_filters['gender'] == 'male':
                data_gen.add_constant_column('Gender', 'Male')
            else:
                data_gen.add_constant_column('Gender', 'Female')
        else:
            data_gen.add_formula_column('Gender', formula=fake.gender)

        def first_name_formula(column_values):
            if column_values['Gender'] == 'Male':
                return fake.first_name_male()
            else:
                return fake.first_name_female()

        data_gen.add_formula_column('First Name', first_name_formula)

        data_gen.add_formula_column('Last Name', formula=fake.last_name)
        data_gen.add_formula_column('Name', lambda cv: cv['First Name'] + ' ' + cv['Last Name'])

        data_gen.apply_transformations()
        return list(map(lambda r: data_gen.row_to_column_values(r, columns).values(), data_gen.rows))
def run(batch_id, source_file_name, output_file_name):
    data_gen = DataGenerator()

    # load source file
    source_columns = [
        'KnowledgeArticle.External_Id__c', 'User.External_Id__c',
        'CreatedDate__c'
    ]
    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.rename_column('KnowledgeArticle.External_Id__c',
                           'KCSArticle__ka.External_Id__c')
    data_gen.rename_column('User.External_Id__c', 'Owner.External_Id__c')

    data_gen.add_formula_column(
        'External_Id__c',
        formula=lambda: 'W_KCSArticleVersion.' + str(data_gen.current_row + 1))

    data_gen.add_formula_column('ArticleNumber__c',
                                lambda: data_gen.current_row + 1)

    data_gen.add_formula_column('PublishStatus__c', ['Archived', 'Online'])

    data_gen.add_constant_column('IsLatestVersion__c', 'true')
    data_gen.add_constant_column('IsVisibleInApp__c', 'true')
    data_gen.add_constant_column('IsVisibleInCsp__c', 'true')
    data_gen.add_constant_column('IsVisibleInPkb__c', 'true')
    data_gen.add_constant_column('IsVisibleInPrm__c', 'true')

    data_gen.add_constant_column('VersionNumber__c', '1')
    data_gen.add_constant_column('Language__c', 'en_US')

    titles = [
        "Health", "Computers", "Music", "Tools", "Home", "Outdoors",
        "Jewelery", "Toys", "Grocery", "Clothing", "Games", "Automotive",
        "Beauty", "Garden", "Books", "Industrial", "Baby", "Kids", "Movies",
        "Sports", "Shoes", "Electronics"
    ]
    data_gen.add_formula_column('Title__c', titles)

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write file
    data_gen.apply_transformations()

    output_columns = [
        'External_Id__c', 'ArticleNumber__c', 'CreatedDate__c',
        'Owner.External_Id__c', 'PublishStatus__c', 'IsLatestVersion__c',
        'IsVisibleInApp__c', 'IsVisibleInCsp__c', 'IsVisibleInPkb__c',
        'IsVisibleInPrm__c', 'KCSArticle__ka.External_Id__c', 'Title__c',
        'VersionNumber__c', 'Language__c', 'analyticsdemo_batch_id__c'
    ]
    data_gen.write(output_file_name, output_columns)
예제 #3
0
def run(batch_id, source_file_name, output_file_name):
    data_gen = DataGenerator()

    # load source file
    source_columns = [
        'External_Id__c', 'Owner.External_Id__c', 'CreatedDate__c'
    ]
    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.rename_column('External_Id__c', 'Case.External_Id__c')
    data_gen.rename_column('Owner.External_Id__c', 'User.External_Id__c')

    # todo one case article per case? at most 1? distribution?
    data_gen.duplicate_rows(
        duplication_factor=lambda: choice([0, 1], p=[.75, .25]))

    data_gen.add_formula_column(
        'External_Id__c',
        formula=lambda: 'W_CaseArticle.' + str(data_gen.current_row + 1))
    data_gen.add_formula_column(
        'KnowledgeArticle.External_Id__c',
        formula=lambda: 'W_KCSArticle.' + str(data_gen.current_row + 1))

    data_gen.add_constant_column('ArticleVersionNumber__c', 1)

    data_gen.add_constant_column('IsSharedByEmail__c', ['true', 'false'])

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write file
    data_gen.apply_transformations()

    output_columns = [
        'External_Id__c', 'User.External_Id__c', 'ArticleVersionNumber__c',
        'CreatedDate__c', 'KnowledgeArticle.External_Id__c',
        'IsSharedByEmail__c', 'Case.External_Id__c',
        'analyticsdemo_batch_id__c'
    ]
    data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name, reference_date=today):
    data_gen = DataGenerator()

    # load source file
    source_columns = ['External_Id__c', 'Name', 'UserRole.Name']
    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.filter(lambda cv: 'RVP' not in cv['UserRole.Name'])
    data_gen.filter(
        lambda cv: 'CSM' not in cv['UserRole.Name'])  # comes from Service

    data_gen.rename_column('External_Id__c', 'QuotaOwner_Id__c')
    data_gen.rename_column('Name', 'OwnerName__c')

    # generate id
    data_gen.add_formula_column(
        'External_Id__c',
        formula=lambda: 'W_Quota.' + str(data_gen.current_row + 1))

    data_gen.duplicate_rows(24)

    def quota_formula():
        # first month of quarter = 300k
        # second month of quarter = 500k
        # third month of quarter = 500k
        quarter = data_gen.current_row % 3
        if quarter == 0:
            return 300000
        elif quarter == 1:
            return 750000
        else:
            return 500000

    data_gen.add_formula_column('QuotaAmount__c', quota_formula)

    current_year = reference_date.year
    last_year = current_year - 1

    def start_date_formula():
        user_row = data_gen.current_row % 24
        month = str((user_row % 12) + 1).zfill(2)
        day = '01'
        if user_row < 12:
            year = str(last_year)
        else:
            year = str(current_year)
        return dateutil.parser.parse(year + '-' + month + '-' + day).date()

    data_gen.add_formula_column('StartDate__c', start_date_formula)

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write file
    data_gen.apply_transformations()
    data_gen.write(output_file_name, [
        'External_Id__c', 'QuotaOwner_Id__c', 'OwnerName__c', 'StartDate__c',
        'QuotaAmount__c'
    ])
예제 #5
0
def run(batch_id, source_file_name, output_file_name):
    data_gen = DataGenerator()

    # load source file
    source_columns = ['KnowledgeArticle.External_Id__c', 'CreatedDate__c']
    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.rename_column('KnowledgeArticle.External_Id__c', 'External_Id__c')

    data_gen.add_formula_column('ArticleNumber__c',
                                lambda: data_gen.current_row + 1)

    data_gen.add_formula_column('CaseAssociationCount__c',
                                lambda: randint(1, 6))

    def first_published_date_formula(column_values):
        create_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        return (create_date +
                timedelta(days=randint(1, 10))).isoformat(sep=' ')

    data_gen.add_formula_column('FirstPublishedDate__c',
                                formula=first_published_date_formula)

    def last_published_date_formula(column_values):
        first_publised_date = dateutil.parser.parse(
            column_values['FirstPublishedDate__c'])
        return (first_publised_date +
                timedelta(days=randint(1, 10))).isoformat(sep=' ')

    data_gen.add_formula_column('LastPublishedDate__c',
                                formula=last_published_date_formula)

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write file
    data_gen.apply_transformations()

    output_columns = [
        'ArticleNumber__c', 'External_Id__c', 'CaseAssociationCount__c',
        'CreatedDate__c', 'FirstPublishedDate__c', 'LastPublishedDate__c',
        'analyticsdemo_batch_id__c'
    ]
    data_gen.write(output_file_name, output_columns)
예제 #6
0
def run(batch_id, source_file_name, output_file_name):
    data_gen = DataGenerator()

    # load source file
    source_columns = [
        'KnowledgeArticle.External_Id__c',
        'User.External_Id__c',
        'CreatedDate__c'
    ]
    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.rename_column('KnowledgeArticle.External_Id__c', 'Parent.External_Id__c')
    data_gen.rename_column('User.External_Id__c', 'Owner.External_Id__c')

    data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_KCSArticle_ViewStat.' + str(data_gen.current_row + 1))

    channels = [
        'App',
        'Desktop Site',
        'Mobile Site'
    ]
    data_gen.add_formula_column('Channel__c', channels)

    data_gen.add_formula_column('ViewCount__c', formula=lambda: randint(1, 100))

    data_gen.add_formula_column('NormalizedScore__c', formula=lambda: round(uniform(1, 10), 3))
    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)
    # apply transformations and write file
    data_gen.apply_transformations()

    output_columns = [
        'External_Id__c',
        'Channel__c',
        'Parent.External_Id__c',
        'ViewCount__c',
        'NormalizedScore__c',
        'analyticsdemo_batch_id__c'
    ]
    data_gen.write(output_file_name, output_columns)
예제 #7
0
def run(batch_id, source_file_name, output_file_name):
    data_gen = DataGenerator()

    # load source file
    account_columns = ['External_Id__c']
    data_gen.load_source_file(source_file_name, account_columns)

    data_gen.rename_column('External_Id__c', 'Account.External_Id__c')
    data_gen.add_formula_column(
        'External_Id__c', lambda cv: cv['Account.External_Id__c'].replace(
            'W_Account', 'W_Contact'))

    data_gen.add_formula_column('FirstName', formula=fake.first_name)
    data_gen.add_formula_column('LastName', formula=fake.last_name)

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    data_gen.apply_transformations()
    data_gen.write(output_file_name)
예제 #8
0
def run(batch_id, source_file_name, output_file_name, reference_datetime=today):
    data_gen = DataGenerator()

    # load source file
    source_columns = [
        'External_Id__c',
        'Owner.External_Id__c',
        'CreatedDate__c',
        'LastActivityDate__c'
    ]
    data_gen.load_source_file(source_file_name, source_columns)


    data_gen.rename_column('External_Id__c', 'Case.External_Id__c')

    data_gen.duplicate_rows(duplication_factor=lambda: randint(0, 3))


    data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_Services_Event.' + str(data_gen.current_row + 1))


    data_gen.add_formula_column('Subject', formula=event.event_subject)
    data_gen.add_formula_column('EventSubtype', formula=event.event_subtype)
    data_gen.add_formula_column('DurationInMinutes', formula=event.event_call_duration)

    def create_date_formula(column_values):
        case_create_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        case_close_date = datetime.combine(dateutil.parser.parse(column_values['LastActivityDate__c']), case_create_date.time())
        if case_close_date > reference_datetime:
            case_close_date = reference_datetime
        create_date = fake.date_time_between_dates(case_create_date, case_close_date)
        if create_date > reference_datetime:
            create_date = reference_datetime
        return create_date.isoformat(sep=' ')
    data_gen.add_formula_column('CreatedDate__c', create_date_formula)

    data_gen.add_copy_column('LastModifiedDate__c', 'CreatedDate__c')


    def activity_date_formula(column_values):
        create_date = dateutil.parser.parse(column_values['CreatedDate__c']).date()
        return (create_date + timedelta(days=randint(0, 14))).isoformat()
    data_gen.add_formula_column('ActivityDate', activity_date_formula)


    def activity_datetime_formula(column_values):
        return dateutil.parser.parse(column_values['ActivityDate'])
    data_gen.add_formula_column('ActivityDateTime', activity_datetime_formula)


    data_gen.add_constant_column('ShowAs', 'Busy')

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write
    data_gen.apply_transformations()

    output_columns = [
        'External_Id__c',
        'Owner.External_Id__c',
        'Case.External_Id__c',
        'Subject',
        'EventSubtype',
        'DurationInMinutes',
        'ShowAs',
        'CreatedDate__c',
        'LastModifiedDate__c',
        'ActivityDate',
        'ActivityDateTime',
        'analyticsdemo_batch_id__c'
    ]
    data_gen.write(output_file_name, output_columns)
def run(batch_id,
        source_file_name,
        output_file_name,
        reference_datetime=today):
    data_gen = DataGenerator()

    # load source file
    source_columns = [
        'External_Id__c', 'Owner.External_Id__c', 'CreatedDate__c',
        'ClosedDate__c', 'Origin'
    ]
    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.rename_column('External_Id__c', 'Case.External_Id__c')
    data_gen.rename_column('ClosedDate__c', 'EndTime__c')

    data_gen.duplicate_rows(duplication_factor=lambda: choice(
        [1, 2, 3, 4, 5], p=[.65, .15, .10, .05, .05]))

    data_gen.add_formula_column(
        'External_Id__c',
        lambda: 'W_LiveChatTranscript.' + str(data_gen.current_row + 1))

    data_gen.add_formula_column('Abandoned__c', lambda: randint(1, 300))

    data_gen.add_formula_column('AverageResponseTimeOperator__c',
                                lambda: randint(1, 180))

    data_gen.add_formula_column('AverageResponseTimeVisitor__c',
                                lambda: randint(1, 180))

    data_gen.add_formula_column('Body__c', formula=fake.body)

    data_gen.add_formula_column('Browser__c', formula=fake.browser)

    data_gen.add_constant_column('BrowserLanguage__c', 'en_US')

    data_gen.add_formula_column('ChatDuration__c', lambda: randint(1, 600))

    data_gen.add_formula_column('ChatKey__c', formula=fake.md5)

    data_gen.add_formula_column('IpAddress__c', formula=fake.ipv4)

    data_gen.add_formula_column('LiveChatButton.DeveloperName',
                                ['Public_Website_Chat_Button'])

    data_gen.add_formula_column('Location__c', formula=fake.city)

    data_gen.add_formula_column('MaxResponseTimeOperator__c',
                                lambda: randint(1, 120))

    data_gen.add_formula_column('MaxResponseTimeVisitor__c',
                                lambda: randint(1, 240))

    data_gen.add_formula_column('Name__c',
                                lambda: str(data_gen.current_row + 1).zfill(8))

    data_gen.add_formula_column('OperatorMessageCount__c',
                                lambda: randint(1, 100))

    data_gen.add_formula_column(
        'Platform__c', ['MacOSX', 'iOS', 'Android', 'Windows', 'Unix'])

    referrer = [
        "https://na17.salesforce.com/setup/forcecomHomepage.apexp?setupid=ForceCom&retURL=%2Fui%2Fsupport%2Fservicedesk%2FServiceDeskPage",
        "https://na13.salesforce.com/home/home.jsp",
        "https://sdodemo-main.force.com/partners/servlet/servlet.Integration?lid=01ra0000001VlbA&ic=1",
        "https://sitestudio.na17.force.com/?exitURL=%2F_ui%2Fnetworks%2Fsetup%2FSetupNetworksPage%2Fd",
        "https://mail.google.com/mail/u/0/",
        "https://sdodemo-main.force.com/customers/servlet/servlet.Integration?lid=01ra0000001VlbP&ic=1",
        "https://sdodemo-main.force.com/consumers/servlet/servlet.Integration?lid=01ro0000000EN78&ic=1",
        "https://na17.salesforce.com/servlet/servlet.su?oid=00D300000007EfQ&retURL=%2F0033000000PuxU2&sunetworkuserid=005a000000AuCha&sunetworkid=0DBo0000000Gn4h",
        "https://sdodemo-main.force.com/customers/servlet/servlet.Integration?ic=1&lid=01ra0000001VlbP"
    ]
    data_gen.add_formula_column('ReferrerUri__c', referrer)

    def create_date_formula(column_values):
        case_create_date = dateutil.parser.parse(
            column_values['CreatedDate__c'])
        case_close_date = dateutil.parser.parse(column_values['EndTime__c'])
        create_date = fake.date_time_between_dates(case_create_date,
                                                   case_close_date)
        if create_date > reference_datetime:
            create_date = reference_datetime
        return create_date.isoformat(sep=' ')

    data_gen.add_formula_column('CreatedDate__c', create_date_formula)

    def start_time_formula(column_values):
        create_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        start_time = create_date + timedelta(seconds=randint(1, 300))
        return start_time.isoformat(sep=' ')

    data_gen.add_formula_column('StartTime__c', start_time_formula)

    def end_time_formula(column_values):
        create_date = dateutil.parser.parse(column_values['StartTime__c'])
        end_time = create_date + timedelta(seconds=randint(1, 600))
        return end_time.isoformat(sep=' ')

    data_gen.add_formula_column('EndTime__c', end_time_formula)

    data_gen.add_copy_column('RequestTime__c', 'CreatedDate__c')

    data_gen.add_formula_column(
        'Status__c', lambda: choice(['Missed', 'Completed'], p=[.20, .80]))

    data_gen.add_map_column('EndedBy__c', 'Status__c', {
        'Completed': ['Visitor', 'Agent'],
        None: 'Visitor'
    })

    data_gen.add_constant_column('SupervisorTranscriptBody__c', '')

    data_gen.add_constant_column('ScreenResolution__c', '')

    data_gen.add_formula_column('UserAgent__c', formula=fake.user_agent)

    data_gen.add_formula_column('VisitorMessageCount__c',
                                lambda: randint(1, 50))

    data_gen.add_formula_column('WaitTime__c', lambda: randint(1, 120))

    def last_referenced_date_formula(column_values):
        create_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        last_referenced_date = create_date + timedelta(seconds=randint(1, 300))
        return last_referenced_date.isoformat(sep=' ')

    data_gen.add_formula_column('LastReferencedDate__c',
                                last_referenced_date_formula)

    data_gen.add_copy_column('LastViewedDate__c', 'LastReferencedDate__c')

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    def filter_func(column_values):
        return column_values['Origin'] == 'Chat'

    data_gen.filter(filter_function=filter_func)

    # apply transformations and write file
    data_gen.apply_transformations()

    data_gen.sort_by('StartTime__c')

    output_columns = [
        'External_Id__c', 'Abandoned__c', 'AverageResponseTimeOperator__c',
        'MaxResponseTimeOperator__c', 'OperatorMessageCount__c', 'Body__c',
        'Browser__c', 'BrowserLanguage__c', 'Case.External_Id__c',
        'ChatDuration__c', 'ChatKey__c', 'CreatedDate__c', 'StartTime__c',
        'EndTime__c', 'EndedBy__c', 'LastReferencedDate__c',
        'LastViewedDate__c', 'LiveChatButton.DeveloperName', 'Location__c',
        'Owner.External_Id__c', 'Platform__c', 'ReferrerUri__c',
        'ScreenResolution__c', 'RequestTime__c', 'Status__c',
        'SupervisorTranscriptBody__c', 'UserAgent__c',
        'AverageResponseTimeVisitor__c', 'IpAddress__c',
        'MaxResponseTimeVisitor__c', 'VisitorMessageCount__c', 'WaitTime__c',
        'analyticsdemo_batch_id__c'
    ]
    data_gen.write(output_file_name, output_columns)
def run(batch_id,
        source_file_name,
        output_file_name,
        reference_datetime=today):
    data_gen = DataGenerator()

    # load source file
    account_columns = [
        'External_Id__c', 'Owner.External_Id__c', 'OpportunityCloseDate__c'
    ]
    data_gen.load_source_file(source_file_name, account_columns)

    data_gen.rename_column('External_Id__c', 'Account.External_Id__c')
    data_gen.rename_column('OpportunityCloseDate__c', 'CreatedDate__c')

    # generate a random number of cases per account
    data_gen.duplicate_rows(
        duplication_factor=lambda: int(lognormal(0) + randint(0, 2)))

    data_gen.add_formula_column(
        'External_Id__c',
        formula=lambda: 'W_Sales_Case.' + str(data_gen.current_row + 1))

    # generate contact
    def contact_formula(column_values):
        return column_values['Account.External_Id__c'].replace(
            'W_Account', 'W_Contact')

    data_gen.add_formula_column('Contact.External_Id__c', contact_formula)

    data_gen.add_formula_column('IsEscalated', case.case_is_escalated)
    data_gen.add_formula_column('CSAT__c', case.case_csat)

    data_gen.add_formula_column('Origin', formula=case.case_origin)
    data_gen.add_formula_column('Type', formula=case.case_type)
    data_gen.add_formula_column('Subject', formula=case.case_subject)
    data_gen.add_formula_column('Priority', formula=case.case_priority)

    data_gen.add_formula_column('Status', formula=case.case_status)

    def create_date_formula(column_values):
        oppty_close_date = dateutil.parser.parse(
            column_values['CreatedDate__c'])
        create_date = oppty_close_date + timedelta(days=randint(0, 90))
        if create_date > reference_datetime:
            create_date = reference_datetime
        return create_date.isoformat(sep=' ')

    data_gen.add_formula_column('CreatedDate__c', create_date_formula)

    def close_date_formula(column_values):
        create_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        if column_values['Status'] == 'Closed':
            close_date = create_date + timedelta(days=randint(0, 10))
            if close_date > reference_datetime:
                close_date = reference_datetime
            return close_date.isoformat(sep=' ')
        else:
            return ''

    data_gen.add_formula_column('ClosedDate__c', close_date_formula)

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    data_gen.apply_transformations()
    data_gen.write(output_file_name)
예제 #11
0
def generate(request):
    data_gen = DataGenerator()
    count = int(request.data['count'])
    type = request.data['type']
    data_gen.row_count = count
    if type == 'name':
        data_gen.add_formula_column('name', formula=fake.name)
    elif type == 'name_male':
        data_gen.add_formula_column('name_male', formula=fake.name_male)
    elif type == 'name_female':
        data_gen.add_formula_column('name_female', formula=fake.name_female)
    elif type == 'first_name':
        data_gen.add_formula_column('first_name', formula=fake.first_name)
    elif type == 'first_name_male':
        data_gen.add_formula_column('first_name_male',
                                    formula=fake.first_name_male)
    elif type == 'first_name_female':
        data_gen.add_formula_column('first_name_female',
                                    formula=fake.first_name_female)
    elif type == 'last_name':
        data_gen.add_formula_column('last_name', formula=fake.last_name)
    elif type == 'last_name_male':
        data_gen.add_formula_column('last_name_male',
                                    formula=fake.last_name_male)
    elif type == 'last_name_female':
        data_gen.add_formula_column('first_name_female',
                                    formula=fake.first_name_female)
    elif type == 'prefix':
        data_gen.add_formula_column('prefix', formula=fake.prefix)
    elif type == 'prefix_male':
        data_gen.add_formula_column('prefix_male', formula=fake.prefix_male)
    elif type == 'prefix_female':
        data_gen.add_formula_column('prefix_female',
                                    formula=fake.prefix_female)
    elif type == 'suffix':
        data_gen.add_formula_column('suffix', formula=fake.suffix)
    elif type == 'suffix_male':
        data_gen.add_formula_column('suffix_male', formula=fake.suffix_male)
    elif type == 'suffix_female':
        data_gen.add_formula_column('suffix_female',
                                    formula=fake.suffix_female)
    elif type == 'phone_number':
        data_gen.add_formula_column('phone_number', formula=fake.phone_number)
    elif type == 'ssn':
        data_gen.add_formula_column('ssn', formula=fake.ssn)
    elif type == 'address':
        data_gen.add_formula_column('address', formula=fake.address)
    elif type == 'building_number':
        data_gen.add_formula_column('building_number',
                                    formula=fake.building_number)
    elif type == 'street_name':
        data_gen.add_formula_column('street_name', formula=fake.street_name)
    elif type == 'street_address':
        data_gen.add_formula_column('street_address',
                                    formula=fake.street_address)
    elif type == 'street_suffix':
        data_gen.add_formula_column('street_suffix',
                                    formula=fake.street_suffix)
    elif type == 'secondary_address':
        data_gen.add_formula_column('secondary_address',
                                    formula=fake.secondary_address)
    elif type == 'city':
        data_gen.add_formula_column('city', formula=fake.city)
    elif type == 'state':
        data_gen.add_formula_column('state', formula=fake.state)
    elif type == 'state_abbr':
        data_gen.add_formula_column('state_abbr', formula=fake.state_abbr)
    elif type == 'zipcode':
        data_gen.add_formula_column('zipcode', formula=fake.zipcode)
    elif type == 'zipcode_plus4':
        data_gen.add_formula_column('zipcode_plus4',
                                    formula=fake.zipcode_plus4)
    elif type == 'country':
        data_gen.add_formula_column('country', formula=fake.country)
    elif type == 'country_code':
        data_gen.add_formula_column('country_code', formula=fake.country_code)
    elif type == 'company':
        data_gen.add_formula_column('company', formula=fake.company)
    elif type == 'job':
        data_gen.add_formula_column('job', formula=fake.job)
    elif type == 'ipv4':
        data_gen.add_formula_column('ipv4', formula=fake.ipv4)
    elif type == 'ipv6':
        data_gen.add_formula_column('ipv6', formula=fake.ipv6)
    elif type == 'url':
        data_gen.add_formula_column('url', formula=fake.url)
    elif type == 'free_email':
        data_gen.add_formula_column('free_email', formula=fake.free_email)
    elif type == 'safe_email':
        data_gen.add_formula_column('safe_email', formula=fake.safe_email)
    elif type == 'company_email':
        data_gen.add_formula_column('company_email',
                                    formula=fake.company_email)
    elif type == 'browser':
        data_gen.add_formula_column('browser', formula=fake.browser)
    elif type == 'md5':
        data_gen.add_formula_column('md5', formula=fake.md5)
    elif type == 'user_agent':
        data_gen.add_formula_column('user_agent', formula=fake.user_agent)
    elif type == 'sentence':
        data_gen.add_formula_column('sentence', formula=fake.sentence)
    else:
        data_gen.add_formula_column('name', formula=fake.name)

    data_gen.apply_transformations()
    flat = [val for sublist in data_gen.rows for val in sublist]
    return Response(flat)
예제 #12
0
def run(batch_id, source_file_name, output_file_name):
    data_gen = DataGenerator()


    # load source file
    source_columns = ['AccountExternalId__c', 'AccountName__c']
    data_gen.load_source_file(source_file_name, source_columns)


    # rename columns
    data_gen.rename_column('AccountExternalId__c', 'External_Id__c')
    data_gen.rename_column('AccountName__c', 'Name')


    # filter out duplicate data
    data_gen.unique()


    # load shape data as dataset
    shape_columns = [
        'AccountExternalId__c',
        'AccountAnnualRevenue__c',
        'AccountNumberOfEmployees__c',
        'AccountBookings__c',
        'Region__c',
        'Owner.External_Id__c',
        'CloseDate',
        'CreatedDate__c']
    shape_dataset = data_gen.load_dataset('shape', source_file_name, shape_columns)


    # build map of account values
    shape_account_map = shape_dataset.group_by('AccountExternalId__c')


    # helper method to get shape data related to an account
    def get_shape_data(column_values, shape_column_name):
        return shape_account_map.get(column_values['External_Id__c'])[0].get(shape_column_name)


    # generate owner
    def owner_formula(column_values):
        return get_shape_data(column_values, 'Owner.External_Id__c')
    data_gen.add_formula_column('Owner.External_Id__c', owner_formula)


    # update number employees based on shape data
    def employees_formula(column_values):
        employees = get_shape_data(column_values, 'AccountNumberOfEmployees__c')
        return randint(*account.client_size_employees_bands[employees])
    data_gen.add_formula_column('NumberOfEmployees', employees_formula)


    # update annual revenue based on shape data
    def revenue_formula(column_values):
        revenue = get_shape_data(column_values, 'AccountAnnualRevenue__c')
        return 1000 * randint(*account.client_size_rev_bands[revenue])
    data_gen.add_formula_column('AnnualRevenue', revenue_formula)


    # generate account source
    data_gen.add_formula_column('AccountSource', formula=account.account_source)


    # update type based on shape data
    def type_formula(column_values):
        return get_shape_data(column_values, 'AccountAnnualRevenue__c')
    data_gen.add_formula_column('Type', type_formula)


    # generate industry
    data_gen.add_formula_column('Industry', formula=account.account_industry)


    # generate billing street
    data_gen.add_formula_column('BillingStreet', formula=lambda: fake.building_number() + ' ' + fake.street_name())


    # generate billing city
    data_gen.add_formula_column('BillingCity', formula=fake.city)


    # update billing state based on shape data
    def state_formula(column_values):
        region = get_shape_data(column_values, 'Region__c')
        return choice(account.region_state_map[region])
    data_gen.add_formula_column('BillingState', state_formula)


    # generate billing country
    data_gen.add_constant_column('BillingCountry', 'USA')


    # generate year started
    data_gen.add_formula_column('YearStarted', formula=account.account_year_started)


    # generate ownership
    data_gen.add_formula_column('Ownership', formula=account.account_ownership)


    # generate rating
    data_gen.add_formula_column('Rating', formula=account.account_rating)


    # generate earliest created date
    def create_date_formula(column_values):
        opptys = shape_account_map.get(column_values['External_Id__c'])
        create_dates = [dateutil.parser.parse(oppty['CreatedDate__c']) for oppty in opptys]
        create_dates.sort()
        return create_dates[0]
    data_gen.add_formula_column('CreatedDate__c', create_date_formula)


    # generate earliest close date
    def close_date_formula(column_values):
        opptys = shape_account_map.get(column_values['External_Id__c'])
        close_dates = [dateutil.parser.parse(oppty['CloseDate']).date() for oppty in opptys]
        close_dates.sort()
        return close_dates[0]
    data_gen.add_formula_column('OpportunityCloseDate__c', close_date_formula)

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write file
    data_gen.apply_transformations()
    data_gen.write(output_file_name)
예제 #13
0
def run(batch_id, source_file_name, output_file_name, reference_date=today_datetime, filter_function=None):

    def get_close_date(values):
        return dateutil.parser.parse(values['CloseDate'])

    def get_create_date(values):
        return dateutil.parser.parse(values['CreatedDate__c'])

    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name)

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # add an age column
    data_gen.add_copy_column('Age__c', 'TimeToClose__c')

    # generate a close date
    def close_date_formula(column_values):
        last_day = date(date.today().year, 12, 31)
        offset = column_values['close_date_offset__c']
        # last day of current year - offset
        close_date = last_day - timedelta(days=int(offset))
        return str(close_date)
    data_gen.add_formula_column('CloseDate', close_date_formula)


    # generate a create date
    def create_date_formula(column_values):
        close_date = dateutil.parser.parse(column_values['CloseDate'])
        offset = column_values['TimeToClose__c']
        create_date = close_date - timedelta(days=int(offset))
        return create_date.isoformat(sep=' ')
    data_gen.add_formula_column('CreatedDate__c', create_date_formula)

    # generate last activity date
    def last_activity_date_formula(column_values):
        create_date = get_create_date(column_values)
        close_date = get_close_date(column_values)
        if close_date > reference_date:
            close_date = reference_date
        if create_date > reference_date:
            create_date = reference_date
        return fake.date_time_between_dates(create_date, close_date).date()
    data_gen.add_formula_column('LastActivityDate__c', formula=last_activity_date_formula)

    data_gen.apply_transformations()

    if filter_function:
        data_gen.filter(filter_function)

    new_rows = []
    row_count = len(data_gen.rows)
    for i in range(row_count):
        row = data_gen.rows.pop()
        column_values = data_gen.row_to_column_values(row)

        close_day = get_close_date(column_values)
        create_day = get_create_date(column_values)

        # if close date is before reference date keep it exactly as is
        if close_day <= reference_date:
            new_rows.append(row)

        # if create date is before reference date, but the close date is after reference date
        elif (create_day <= reference_date) and (close_day > reference_date):
            # set age
            age = (reference_date - create_day).days
            column_values['Age__c'] = age

            ttc = float(column_values['TimeToClose__c'])
            pct = age / ttc

            # set IsClosed to blank
            column_values['IsClosed'] = ''

            # set IsWon to blank
            column_values['IsWon'] = ''

            # set a stage name
            stage_name_index = int(floor(pct * 4) + choice([-1, 0, 1], p=[.2, .7, .1]))

            # adjust the stage name index
            if stage_name_index < 0:
                stage_name_index = 0
            if stage_name_index > 3:
                stage_name_index = 3

            column_values['StageName'] = definitions.stage_name[stage_name_index]

            column_values['Probability'] = definitions.probabilities[stage_name_index]

            column_values['ForecastCategory'] = definitions.forecast_category[choice([1, 2, 4], p=[.625, .25, .125])]

            column_values['ForecastCategoryName'] = definitions.forecast_category_name[column_values['ForecastCategory']]

            column_values['SalesStageCount__c'] = ceil(pct * float(column_values['SalesStageCount__c']))

            new_rows.append(data_gen.column_values_to_row(column_values))



    data_gen.rows = new_rows
    data_gen.reverse()

    data_gen.write(output_file_name)
예제 #14
0
def run(batch_id, source_file_name, output_file_name, reference_date=today_datetime):
    def get_close_date(values):
        return dateutil.parser.parse(values['ClosedDate__c'])


    def get_create_date(values):
        return dateutil.parser.parse(values['CreatedDate__c'])


    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name)

    # calculate dates
    def close_date_formula(column_values):
        last_day = date(date.today().year, 12, 31)
        last_day = datetime.combine(last_day, datetime.min.time())
        offset = column_values['close_date_offset']
        # last day of current year - offset
        close_date = last_day - timedelta(days=int(offset))
        close_date = close_date + timedelta(hours=int(choice([9, 10, 11, 12, 13, 14, 15, 16, 17],
                                                         p=[.12, .13, .13, .07, .09, .13, .13, .11, .09])),
                                            minutes=randint(0, 60),
                                            seconds=randint(0, 60))
        return close_date.isoformat(sep=' ')
    data_gen.add_formula_column('ClosedDate__c', close_date_formula)


    def created_date_formula(column_values):
        time_open = int(column_values['Time_Open__c'])
        date_closed = dateutil.parser.parse(column_values['ClosedDate__c'])
        return (date_closed - timedelta(days=time_open)).isoformat(sep=' ')
    data_gen.add_formula_column('CreatedDate__c', created_date_formula)

    # generate last activity date
    def last_activity_date_formula(column_values):
        create_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        close_date = dateutil.parser.parse(column_values['ClosedDate__c'])
        if close_date > today_datetime:
            close_date = today_datetime
        if create_date > today_datetime:
            create_date = today_datetime
        return fake.date_time_between_dates(create_date, close_date).date()
    data_gen.add_formula_column('LastActivityDate__c', formula=last_activity_date_formula)

    data_gen.apply_transformations()

    new_rows = []
    row_count = len(data_gen.rows)
    for i in range(row_count):
        row = data_gen.rows.pop()
        column_values = data_gen.row_to_column_values(row)

        close_day = get_close_date(column_values)
        create_day = get_create_date(column_values)

        # if close date is before reference date keep it exactly as is
        if close_day <= reference_date:
            new_rows.append(row)

        # if create date is before reference date, but the close date is after reference date
        elif (create_day <= reference_date) and (close_day > reference_date):

            column_values['Status'] = choice([
                'New',
                'Working',
                'Waiting on Customer',
                'Response Received',
                'Escalated',
                'Warning',
                'Attention',
                'On Hold',
                'Closed in Community'], p=[
                0.20,
                0.30,
                0.10,
                0.05,
                0.10,
                0.05,
                0.05,
                0.10,
                0.05
            ])

            new_rows.append(data_gen.column_values_to_row(column_values))

    data_gen.rows = new_rows
    data_gen.reverse()

    def milestone_status_formula(column_values):
        status = column_values['Status']
        if status != 'Closed':
            status = 'Open'
        sla = column_values['SLA']
        return status + ' - ' + sla
    data_gen.add_formula_column('MilestoneStatus__c', formula=milestone_status_formula)

    data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_Case.' + str(data_gen.current_row + 1))

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    data_gen.apply_transformations()

    data_gen.write(output_file_name)
예제 #15
0
def run(batch_id, source_file_name, output_file_name, products_file_name, pricebook_file_name):
    data_gen = DataGenerator()

    # load source file
    source_columns = ['External_Id__c', 'Product2Name__c', 'Amount']
    data_gen.load_source_file(source_file_name, source_columns)

    # load datasets
    products = data_gen.load_dataset('products', products_file_name)
    products_by_name = products.group_by('Name')

    pricebook = data_gen.load_dataset('pricebook', pricebook_file_name)
    pricebook_by_product = pricebook.group_by('Product2.External_Id__c')

    # rename columns
    data_gen.rename_column('External_Id__c', 'Opportunity.External_Id__c')
    data_gen.rename_column('Amount', 'TotalPrice')

    data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_OpportunityLineItem.' + str(data_gen.current_row + 1))

    # transform product name to code
    data_gen.add_formula_column('ProductCode', lambda cv: products_by_name[cv['Product2Name__c']][0]['ProductCode'])

    # generate product reference id
    data_gen.add_formula_column('Product2.External_Id__c',
                                lambda cv: products_by_name[cv['Product2Name__c']][0]['External_Id__c'])

    # generate list price
    data_gen.add_formula_column('ListPrice', lambda cv: pricebook_by_product[cv['ProductCode']][0]['UnitPrice'])

    # generate pricebook reference id
    data_gen.add_formula_column('PricebookEntry.External_Id__c',
                                lambda cv: pricebook_by_product[cv['ProductCode']][0]['External_Id__c'])

    # generate quantity
    def quanity_formula(column_values):
        total_price = int(column_values['TotalPrice'])
        list_price = int(column_values['ListPrice'])
        quantity = total_price / list_price
        if quantity <= 0:
            quantity = 1
        return ceil(quantity)
    data_gen.add_formula_column('Quantity', quanity_formula)

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write file
    data_gen.apply_transformations()

    output_columns = [
        'External_Id__c',
        'Opportunity.External_Id__c',
        'TotalPrice',
        'PricebookEntry.External_Id__c',
        'Quantity',
        'analyticsdemo_batch_id__c'
    ]
    data_gen.write(output_file_name, output_columns)
예제 #16
0
def run(batch_id,
        source_file_name,
        output_file_name,
        config,
        reference_date=today_datetime,
        filter_function=None):

    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name)

    # generate external id
    col_name = config['externalIdColumnName']
    data_gen.add_formula_column(col_name,
                                formula=lambda: config['externalIdFormat'] +
                                str(data_gen.current_row + 1))

    # iterate through the columns to be mapped
    #  load current foreign file
    #  if replaceSourceColumn is true, replace the 'sourceColumn' by 'replacementColumnName'
    #  retrieve 'foreignRetrieveColumn' where 'foreignMappingColumn' == 'sourceColumn'
    for mapCol in config['mappings']:
        if '.source.' in mapCol['foreignFile']:
            foreign_file = definitions.mfg_source_path + mapCol['foreignFile']
        else:
            foreign_file = definitions.mfg_temporal_path.format(
                today.isoformat()) + mapCol['foreignFile']

        foreignRetrieveColumn = mapCol['foreignRetrieveColumn']
        sourceColumn = mapCol['sourceColumn']

        aux_dataset = data_gen.load_dataset('aux', foreign_file)
        aux_by_id = aux_dataset.group_by('Id')

        def get_aux_data(column_values):
            if column_values[sourceColumn] == '':
                aux_data = ''
            else:
                aux_data = aux_by_id.get(
                    column_values[sourceColumn])[0].get(foreignRetrieveColumn)
            return aux_data

        data_gen.add_formula_column(sourceColumn, formula=get_aux_data)

        data_gen.apply_transformations()

        if mapCol['replaceSourceColumn']:
            data_gen.rename_column(sourceColumn,
                                   mapCol['replacementColumnName'])

    # always empty the auxiliary lists
    aux_dataset = []
    aux_by_id = []

    # remove auxiliary dataset to free up memory
    if 'aux' in data_gen.datasets:
        data_gen.remove_dataset('aux')

    if 'Status' in data_gen.column_names:
        data_gen.add_constant_column('Status', 'Draft')

    # generate LastProcessedDate
    data_gen.add_constant_column('LastProcessedDate', today.isoformat())

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    data_gen.apply_transformations()

    if filter_function:
        data_gen.filter(filter_function)

    data_gen.write(output_file_name)

    # Now the creation of the status file begins
    tmp_folder = reference_date.strftime("%Y-%m-%d")
    if 'Contract.csv' in output_file_name:
        generate_status_file(source_file=output_file_name,
                             original_status_file=definitions.source_contract,
                             tmp_folder=tmp_folder,
                             file_name='Contract.status.ALL.csv')
    elif 'Order.csv' in output_file_name:
        generate_status_file(source_file=output_file_name,
                             original_status_file=definitions.source_order,
                             tmp_folder=tmp_folder,
                             file_name='Order.status.ALL.csv')
    elif 'SalesAgreement.csv' in output_file_name:
        generate_status_file(
            source_file=output_file_name,
            original_status_file=definitions.source_sales_agreement,
            tmp_folder=tmp_folder,
            file_name='SalesAgreement.status.ALL.csv')
def run(batch_id, source_file_name, output_file_name, accounts_file_name, contacts_file_name):
    data_gen = DataGenerator()


    # load source file
    source_columns = [
        'External_Id__c',
        'AccountExternalId__c',
        'Owner.External_Id__c',
        'LeadSource',
        'CloseDate',
        'CreatedDate__c'
    ]
    data_gen.load_source_file(source_file_name, source_columns)

    # load accounts as dataset
    account_columns = [
        'External_Id__c',
        'Name',
        'BillingState',
        'Industry'
    ]
    account_dataset = data_gen.load_dataset('accounts', accounts_file_name, account_columns)
    accounts_by_id = account_dataset.group_by('External_Id__c')


    # load contacts as dataset
    contact_columns = [
        'External_Id__c',
        'FirstName',
        'LastName'
    ]
    contact_dataset = data_gen.load_dataset('contacts', contacts_file_name, contact_columns)
    contacts_by_id = contact_dataset.group_by('External_Id__c')


    # helper method to get account data
    def get_account_data(column_values, account_column_name):
        return accounts_by_id.get(column_values['ConvertedAccount.External_Id__c'])[0].get(account_column_name)


    # helper method to get contact data
    def get_contact_data(column_values, contact_column_name):
        return contacts_by_id.get(column_values['ConvertedContact.External_Id__c'])[0].get(contact_column_name)


    # rename columns
    data_gen.rename_column('External_Id__c', 'ConvertedOpportunity.External_Id__c')
    data_gen.rename_column('AccountExternalId__c', 'ConvertedAccount.External_Id__c')
    data_gen.rename_column('CloseDate', 'ConvertedDate__c')


    # generate converted lead at a random ratio
    data_gen.duplicate_rows(duplication_factor=lambda: choice([0, 1], p=[.75, .25]))


    # generate id
    data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_Lead.' + str(data_gen.current_row + 1))


    # generate create date
    def create_date_formula(column_values):
        oppty_create_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        return oppty_create_date - timedelta(days=randint(0, 45))
    data_gen.add_formula_column('CreatedDate__c', create_date_formula)


    # generate status
    data_gen.add_formula_column('Status', formula=lead.lead_status)


    # generate status
    data_gen.add_map_column('IsConverted', 'Status', {
        'Qualified - Convert': 'true',
        None: 'false'
    })


    # generate opportunity
    data_gen.add_map_column('ConvertedOpportunity.External_Id__c', 'Status', {
        'Qualified - Convert': lambda cv: cv['ConvertedOpportunity.External_Id__c'],
        None: ''
    })


    # generate account
    data_gen.add_map_column('ConvertedAccount.External_Id__c', 'Status', {
        'Qualified - Convert': lambda cv: cv['ConvertedAccount.External_Id__c'],
        None: ''
    })


    # generate contact
    data_gen.add_map_column('ConvertedContact.External_Id__c', 'Status', {
        'Qualified - Convert': lambda cv: cv['ConvertedAccount.External_Id__c'].replace('W_Account', 'W_Contact'),
        None: ''
    })


    # generate converted date
    data_gen.add_map_column('ConvertedDate__c', 'Status', {
        'Qualified - Convert': lambda cv: cv['ConvertedDate__c'],
        None: ''
    })


    # generate name
    data_gen.add_map_column('FirstName', 'Status', {
        'Qualified - Convert': lambda cv: get_contact_data(cv, 'FirstName'),
        None: lambda: fake.first_name()
    })


    data_gen.add_map_column('LastName', 'Status', {
        'Qualified - Convert': lambda cv: get_contact_data(cv, 'LastName'),
        None: lambda: fake.last_name()
    })


    # generate company
    data_gen.add_map_column('Company', 'Status', {
        'Qualified - Convert': lambda cv: get_account_data(cv, 'Name'),
        None: 'Not Applicable'
    })


    # generate industry
    data_gen.add_map_column('Industry', 'Status', {
        'Qualified - Convert': lambda cv: get_account_data(cv, 'Industry'),
        None: ''
    })


    # generate state
    data_gen.add_map_column('State', 'Status', {
        'Qualified - Convert': lambda cv: get_account_data(cv, 'BillingState'),
        None: ''
    })


    # generate is unread by owner
    data_gen.add_map_column('IsUnreadByOwner', 'Status', {
        'Qualified - Convert': 'false',
        None: lead.lead_is_unread_by_owner
    })

    # generate rating
    data_gen.add_formula_column('Rating', formula=lead.lead_rating)

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write file
    data_gen.apply_transformations()
    data_gen.write(output_file_name)
def run(batch_id,
        source_file_name,
        output_file_name,
        reference_date=today_datetime):
    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name, ['External_Id__c'])

    data_gen.rename_column('External_Id__c', 'User.External_Id__c')

    data_gen.add_copy_column('Owner.External_Id__c', 'User.External_Id__c')

    data_gen.duplicate_rows(duplication_factor=lambda: int(normal(60, 10)))

    data_gen.add_formula_column(
        'External_Id__c',
        lambda: 'W_UserServicePresence.' + str(data_gen.current_row + 1))

    data_gen.add_formula_column('AtCapacityDuration__c',
                                lambda: randint(30, 900))

    data_gen.add_formula_column('AverageCapacity__c', lambda: randint(30, 500))

    data_gen.add_formula_column('ConfiguredCapacity__c',
                                lambda: randint(30, 600))

    start_date = reference_date - timedelta(days=365)
    end_date = reference_date

    data_gen.add_formula_column(
        'CreatedDate__c', lambda: fake.date_time_between_dates(
            start_date, end_date).isoformat(sep=' '))

    data_gen.add_formula_column('IdleDuration__c', lambda: randint(30, 600))

    data_gen.add_formula_column('IsCurrentState__c',
                                lambda: choice(['true', 'false']))

    data_gen.add_formula_column('IsAway__c', lambda: choice(['true', 'false']))

    data_gen.add_formula_column('StatusDuration__c', lambda: randint(30, 900))

    data_gen.add_copy_column('StatusStartDate__c', 'CreatedDate__c')

    def status_end_date_formula(column_values):
        start_date = dateutil.parser.parse(column_values['StatusStartDate__c'])
        status_duration = int(column_values['StatusDuration__c'])
        return (start_date +
                timedelta(seconds=status_duration)).isoformat(sep=' ')

    data_gen.add_formula_column('StatusEndDate__c',
                                formula=status_end_date_formula)

    data_gen.add_formula_column('ServicePresenceStatus.DeveloperName', [
        'Busy', 'Online', 'Available_Live_Agent', 'Busy_Break', 'Busy_Lunch',
        'Busy_Training', 'Available_LiveMessage'
    ])

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    data_gen.apply_transformations()

    output_columns = [
        'External_Id__c', 'User.External_Id__c', 'Owner.External_Id__c',
        'AtCapacityDuration__c', 'AverageCapacity__c', 'ConfiguredCapacity__c',
        'CreatedDate__c', 'IdleDuration__c', 'IsAway__c', 'IsCurrentState__c',
        'StatusDuration__c', 'StatusStartDate__c', 'StatusEndDate__c',
        'ServicePresenceStatus.DeveloperName', 'analyticsdemo_batch_id__c'
    ]
    data_gen.write(output_file_name, output_columns)
def run(batch_id,
        source_file_name,
        output_file_name,
        source_accounts,
        source_service_resources,
        source_service_territories,
        source_work_orders,
        reference_datetime=today):
    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name)

    data_gen.add_formula_column(
        'EarliestStartTime',
        lambda cv: dateutil.parser.parse(cv['EarliestStartTime']))

    data_gen.apply_transformations()

    data_gen.sort_by('EarliestStartTime', reverse=True)

    # shift dates to be 2 weeks prior to the reference date
    delta = reference_datetime.date() - data_gen.row_to_column_values(
        data_gen.rows[0])['EarliestStartTime'].date()
    data_gen.add_formula_column(
        'EarliestStartTime', lambda cv: (cv['EarliestStartTime'] + timedelta(
            days=delta.days - 1)).replace(tzinfo=None))

    data_gen.add_formula_column(
        'ActualStartTime', lambda cv: "" if cv['ActualStartTime'] == "" else
        (dateutil.parser.parse(cv['ActualStartTime']) + timedelta(
            days=delta.days - 1)).replace(tzinfo=None))

    data_gen.add_formula_column(
        'ActualEndTime', lambda cv: "" if cv['ActualEndTime'] == "" else
        (dateutil.parser.parse(cv['ActualEndTime']) + timedelta(
            days=delta.days - 1)).replace(tzinfo=None))

    data_gen.add_formula_column(
        'ArrivalWindowStartTime', lambda cv: ""
        if cv['ArrivalWindowStartTime'] == "" else
        (dateutil.parser.parse(cv['ArrivalWindowStartTime']) + timedelta(
            days=delta.days - 1)).replace(tzinfo=None))

    data_gen.add_formula_column(
        'ArrivalWindowEndTime', lambda cv: ""
        if cv['ArrivalWindowEndTime'] == "" else
        (dateutil.parser.parse(cv['ArrivalWindowEndTime']) + timedelta(
            days=delta.days - 1)).replace(tzinfo=None))

    data_gen.add_formula_column(
        'DueDate', lambda cv: ""
        if cv['DueDate'] == "" else (dateutil.parser.parse(cv[
            'DueDate']) + timedelta(days=delta.days - 1)).replace(tzinfo=None))

    data_gen.apply_transformations()

    data_gen.add_copy_column('CreatedDate__c', 'EarliestStartTime')

    accounts = data_gen.load_dataset("Accounts", source_accounts,
                                     ['Id', 'External_ID__c']).dict(
                                         'Id', 'External_ID__c')

    data_gen.add_map_column('Account.External_Id__c', 'AccountId', accounts)

    service_resources = data_gen.load_dataset("ServiceResources",
                                              source_service_resources,
                                              ['Id', 'External_ID__c']).dict(
                                                  'Id', 'External_ID__c')

    data_gen.add_map_column('ServiceResource.External_Id__c',
                            'FSLDemoTools_Service_Resource__c',
                            service_resources)

    service_territories = data_gen.load_dataset("ServiceTerritories",
                                                source_service_territories,
                                                ['Id', 'External_ID__c']).dict(
                                                    'Id', 'External_ID__c')

    data_gen.add_map_column('ServiceTerritory.External_Id__c',
                            'ServiceTerritoryId', service_territories)

    work_orders = data_gen.load_dataset("WorkOrders", source_work_orders,
                                        ['Id', 'External_ID__c']).dict(
                                            'Id', 'External_ID__c')

    data_gen.add_map_column('WorkOrder.External_Id__c', 'ParentRecordId',
                            work_orders)

    data_gen.apply_transformations()

    data_gen.filter(
        lambda cv: cv['WorkOrder.External_Id__c'].startswith('WO.'))

    data_gen.apply_transformations()

    data_gen.write(
        output_file_name,
        columns=[
            'External_ID__c', 'CreatedDate__c',
            'ServiceResource.External_Id__c',
            'ServiceTerritory.External_Id__c', 'WorkOrder.External_Id__c',
            'ActualStartTime', 'ArrivalWindowStartTime', 'ActualDuration',
            'EarliestStartTime', 'Duration', 'DurationType', 'Status',
            'DueDate', 'ActualEndTime', 'ArrivalWindowEndTime'
        ])
    return delta
예제 #20
0
def run(batch_id, source_file_name, output_file_name, reference_datetime=today):
    data_gen = DataGenerator()

    # load source file
    source_columns = [
        'External_Id__c',
        'Owner.External_Id__c',
        'CreatedDate__c',
        'LastActivityDate__c',
        'Team__c'
    ]
    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.rename_column('External_Id__c', 'Case.External_Id__c')
    data_gen.rename_column('LastActivityDate__c', 'ActivityDate')
    data_gen.rename_column('Team__c', 'CallObject')


    # generate a random number of tasks per case
    data_gen.duplicate_rows(duplication_factor=lambda: randint(0, 3))


    data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_Services_Task.' + str(data_gen.current_row + 1))


    data_gen.add_formula_column('TaskSubtype', formula=task.task_subtype)
    data_gen.add_formula_column('CallDurationInSeconds', formula=task.task_call_duration)
    data_gen.add_formula_column('CallDisposition', formula=task.task_call_disposition)
    data_gen.add_formula_column('CallType', formula=task.task_call_type)


    data_gen.add_formula_column('Status', formula=task.task_status)

    data_gen.add_formula_column('Priority', formula=task.task_priority)

    def create_date_formula(column_values):
        case_create_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        case_close_date = datetime.combine(dateutil.parser.parse(column_values['ActivityDate']), case_create_date.time())
        create_date = fake.date_time_between_dates(case_create_date, case_close_date)
        if create_date > reference_datetime:
            create_date = reference_datetime
        return create_date.isoformat(sep=' ')
    data_gen.add_formula_column('CreatedDate__c', create_date_formula)

    data_gen.add_copy_column('LastModifiedDate__c', 'CreatedDate__c')

    def activity_date_formula(column_values):
        create_date = dateutil.parser.parse(column_values['CreatedDate__c']).date()
        return (create_date + timedelta(days=randint(0, 14))).isoformat()
    data_gen.add_formula_column('ActivityDate', activity_date_formula)


    data_gen.add_formula_column('Subject', formula=task.task_subject_simple)

    data_gen.add_map_column('Type', 'Subject', value_map={
        'Call': lambda: choice(['Call', 'Meeting'], p=[.70, .30]),
        'Send Letter': 'Email',
        'Send Quote': 'Email',
        None: lambda: choice(['Meeting', 'Prep', 'Other'], p=[.50, .25, .25])
    })

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write
    data_gen.apply_transformations()

    output_columns = [
        'External_Id__c',
        'Owner.External_Id__c',
        'Case.External_Id__c',
        'CreatedDate__c',
        'LastModifiedDate__c',
        'ActivityDate',
        'Subject',
        'Type',
        'TaskSubtype',
        'CallDurationInSeconds',
        'CallDisposition',
        'CallType',
        'CallObject',
        'Status',
        'Priority',
        'analyticsdemo_batch_id__c'
    ]
    data_gen.write(output_file_name, output_columns)
def run(source_file_name, output_file_name):
    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name)

    rename_map = {
        'Supplies Group': 'Product2Family__c',
        'Region': 'Region__c',
        'Route To Market': 'LeadSource',
        'Elapsed Days In Sales Stage': 'TimeToClose__c',
        'Sales Stage Change Count': 'SalesStageCount__c',
        'Opportunity Amount USD': 'Amount',
        'Deal Size Category': 'DealSizeCategory__c'
    }
    data_gen.rename_columns(rename_map)

    # multiple time to close by 2
    data_gen.add_formula_column('TimeToClose__c',
                                lambda cv: int(cv['TimeToClose__c']) * 2)

    # map existing columns to new columns
    data_gen.add_map_column('Competitor__c', 'Competitor Type',
                            definitions.competitor_type)
    data_gen.add_map_column('Product2Name__c', 'Supplies Subgroup',
                            definitions.supplies_subgroup_map)
    data_gen.add_map_column('AccountAnnualRevenue__c',
                            'Client Size By Revenue',
                            definitions.client_size_rev)
    data_gen.add_map_column('AccountNumberOfEmployees__c',
                            'Client Size By Employee Count',
                            definitions.client_size_employees)
    data_gen.add_map_column('AccountBookings__c',
                            'Revenue From Client Past Two Years',
                            definitions.client_past_revenue)
    data_gen.add_map_column('IsWon', 'Opportunity Result', definitions.isWon)

    # generate external id
    data_gen.add_formula_column(
        'External_Id__c',
        formula=lambda: 'W_Opportunity.' + str(data_gen.current_row + 1))

    data_gen.add_formula_column(
        'Exec_Meeting__c', lambda: choice(['true', 'false'], p=[.35, .65]))

    data_gen.add_formula_column(
        'Interactive_Demo__c', lambda: choice(['true', 'false'], p=[.30, .70]))

    def ttc_formula(column_values):
        ttc = int(column_values['TimeToClose__c'])
        exec_meeting = column_values['Exec_Meeting__c']
        competitor_type = column_values['Competitor Type']
        demo = column_values['Interactive_Demo__c']
        rev = column_values['AccountAnnualRevenue__c']

        if ttc == 0:
            return 0

        if exec_meeting == 'true':
            if competitor_type == 'None':
                ttc = ttc + 4
            else:
                ttc = ttc - 6
        if demo == 'true':
            if rev == 'T100':
                ttc = ttc + 6
            else:
                ttc = ttc - 5

        if ttc < 0:
            return 0

        return ttc

    data_gen.add_formula_column('TimeToClose__c', formula=ttc_formula)

    data_gen.add_constant_column('IsClosed', 'true')

    data_gen.add_formula_column(
        'RecordType.DeveloperName',
        formula=lambda: choice(['SimpleOpportunity', 'ChannelPartner'],
                               p=[.70, .30]))

    # generate opportunity type
    types = [
        'Add-On Business', 'Existing Business', 'New Business',
        'New Business / Add-on'
    ]
    data_gen.add_formula_column(
        'Type', formula=lambda: choice(types, p=[0.1, 0.3, 0.5, 0.1]))

    # generate a close date year and quarter
    data_gen.add_formula_column('close_date_year',
                                formula=lambda: choice(list(range(0, 30))))
    data_gen.add_formula_column(
        'close_date_quarter',
        formula=lambda: choice([1, 2, 3, 4], p=[0.21, 0.24, 0.22, 0.33]))

    # generate a close date offset from the year and quarter
    def offset_formula(column_values):
        day = int(round(chisquare(9) * 5))
        offset = 365 * (column_values['close_date_year']) + 91 * (
            column_values['close_date_quarter'] - 1) + day
        return offset

    data_gen.add_formula_column('close_date_offset__c', offset_formula)

    # generate a close date
    def close_date_formula(column_values):
        last_day = date(date.today().year, 12, 31)
        offset = column_values['close_date_offset__c']
        # last day of current year - offset
        close_date = last_day - timedelta(days=int(offset))
        return str(close_date)

    data_gen.add_formula_column('CloseDate', close_date_formula)

    # generate a create date
    def create_date_formula(column_values):
        close_date = dateutil.parser.parse(column_values['CloseDate'])
        offset = column_values['TimeToClose__c']
        create_date = close_date - timedelta(days=int(offset))
        return create_date.isoformat(sep=' ')

    data_gen.add_formula_column('CreatedDate__c', create_date_formula)

    # generate last activity date
    def last_activity_date_formula(column_values):
        create_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        close_date = dateutil.parser.parse(column_values['CloseDate'])
        if close_date > today_datetime:
            close_date = today_datetime
        if create_date > today_datetime:
            create_date = today_datetime
        return fake.date_time_between_dates(create_date, close_date).date()

    data_gen.add_formula_column('LastActivityDate__c',
                                formula=last_activity_date_formula)

    # generate StageName, ForecastCategory, and Probability
    data_gen.add_map_column('StageName',
                            'Opportunity Result',
                            value_map={
                                'Won': 'Closed Won',
                                None: 'Closed Lost'
                            })
    data_gen.add_map_column('ForecastCategory',
                            'Opportunity Result',
                            value_map={
                                'Won': 'Closed',
                                None: 'Omitted'
                            })
    data_gen.add_map_column('ForecastCategoryName',
                            'Opportunity Result',
                            value_map={
                                'Won': 'Closed',
                                None: 'Omitted'
                            })
    data_gen.add_map_column('Probability',
                            'Opportunity Result',
                            value_map={
                                'Won': '100',
                                None: '0'
                            })

    # randomly pick an owner from the same region
    region_territory_map = {
        'Pacific':
        lambda: 'W_Sales_User.' + str(choice([1, 2, 3, 4, 5, 6])),
        "Northwest":
        lambda: 'W_Sales_User.' + str(choice([1, 2, 3, 4, 5, 6])),
        "Midwest":
        lambda: 'W_Sales_User.' + str(choice([7, 8, 9, 10, 11])),
        "Southwest":
        lambda: 'W_Sales_User.' + str(choice([7, 8, 9, 10, 11])),
        "Mid-Atlantic":
        lambda: 'W_Sales_User.' + str(choice([7, 8, 9, 10, 11])),
        "Northeast":
        lambda: 'W_Sales_User.' + str(choice([12, 13, 14, 15, 16, 17])),
        "Southeast":
        lambda: 'W_Sales_User.' + str(choice([12, 13, 14, 15, 16, 17]))
    }
    data_gen.add_map_column('Owner.External_Id__c', 'Region__c',
                            region_territory_map)

    # build out helper column for account selection
    def account_cat_formula(column_values):
        x1 = column_values['Client Size By Revenue']
        x2 = column_values['Client Size By Employee Count']
        x3 = column_values['Revenue From Client Past Two Years']
        return str(x1) + '.' + str(x2) + '.' + str(x3)

    data_gen.add_formula_column('account_cat', account_cat_formula)

    # apply pending transformations now so we can sort by account_cat
    data_gen.apply_transformations()

    data_gen.sort_by('account_cat')

    # helper dataset used for account selection
    data_gen.add_dataset('account_segment', {
        'account_id': 0,
        'account_count': 0,
        'current_account_cat': None
    })

    # generate a distribution of account ids
    def account_id_formula(column_values):
        account_segment = data_gen.datasets['account_segment']
        account_id = account_segment['account_id']
        account_count = account_segment['account_count']
        current_account_cat = account_segment['current_account_cat']

        if column_values[
                'account_cat'] == current_account_cat and account_count > 0:
            # continue with the current account_id if there are still any to take
            # but first decrement account count
            account_count += -1
            account_segment['account_count'] = account_count

            return account_id
        else:
            # use new account id
            account_id += 1
            # generate a random number of opportunties to associate to an account
            account_count = int(round(lognormal(1))) + randint(1, 7)
            current_account_cat = column_values['account_cat']

            # update account segment dataset for next iteration
            account_count += -1
            account_segment['account_id'] = account_id
            account_segment['account_count'] = account_count
            account_segment['current_account_cat'] = current_account_cat

            return account_id

    data_gen.add_formula_column('AccountId__c', account_id_formula)

    # generate account id string
    data_gen.add_formula_column(
        'AccountExternalId__c',
        formula=lambda cv: 'W_Account.' + str(cv['AccountId__c']))

    # generate account name string
    account_names = {}

    def account_name_formula(column_values):
        account_id = column_values['AccountId__c']
        if account_id in account_names:
            return account_names[account_id]
        else:
            account_name = account.account_name()
            account_names[account_id] = account_name
            return account_name

    data_gen.add_formula_column('AccountName__c', formula=account_name_formula)

    # generate name
    def name_formula(column_values):
        account_name = column_values['AccountName__c']
        amount = column_values['Amount']
        product_2_name = column_values['Product2Name__c']
        return account_name + ' ' + str(data_gen.current_row % 256)

    data_gen.add_formula_column('Name', name_formula)

    # apply remaining transformations
    data_gen.apply_transformations()

    # sort by account id
    data_gen.sort_by('AccountId__c')

    columns_to_write = [
        'External_Id__c', 'Product2Name__c', 'Product2Family__c', 'Region__c',
        'LeadSource', 'TimeToClose__c', 'SalesStageCount__c', 'Amount',
        'AccountAnnualRevenue__c', 'AccountNumberOfEmployees__c',
        'AccountBookings__c', 'Competitor__c', 'DealSizeCategory__c',
        'AccountExternalId__c', 'AccountName__c', 'close_date_year',
        'close_date_quarter', 'close_date_offset__c', 'Exec_Meeting__c',
        'Interactive_Demo__c', 'IsWon', 'IsClosed', 'Owner.External_Id__c',
        'Name', 'Type', 'StageName', 'ForecastCategory',
        'ForecastCategoryName', 'Probability', 'RecordType.DeveloperName'
    ]

    data_gen.write(output_file_name, columns_to_write)
예제 #22
0
def run(batch_id, source_file_name, product_output_file_name,
        pricebook_output_file_name):
    data_gen = DataGenerator()

    # load source file
    source_columns = ['Product2Name__c', 'Product2Family__c']
    data_gen.load_source_file(source_file_name, source_columns)

    # rename columns
    data_gen.rename_column('Product2Name__c', 'Name')
    data_gen.rename_column('Product2Family__c', 'Family')

    # filter out duplicate data
    data_gen.unique()

    # generate product code
    data_gen.add_formula_column(
        'External_Id__c',
        formula=lambda: 'W_Product.' + str(data_gen.current_row + 1))

    data_gen.add_copy_column('ProductCode', 'External_Id__c')

    # apply transformations and write Product2 file
    data_gen.apply_transformations()
    data_gen.write(product_output_file_name)

    # generate pricebook entry code
    data_gen.add_formula_column(
        'External_Id__c',
        formula=lambda: 'W_PricebookEntry.' + str(data_gen.current_row + 1))

    # generate product id reference
    data_gen.add_copy_column('Product2.External_Id__c', 'ProductCode')

    # get map of product names to opportunity amounts
    shape_dataset = data_gen.load_dataset('shape', source_file_name,
                                          ['Product2Name__c', 'Amount'])
    amounts_by_product_name = shape_dataset.group_by('Product2Name__c')

    # generate unit price
    def unit_price_formula(column_values):
        # find average opportunity amount for product
        product_name = column_values['Name']
        amounts = amounts_by_product_name[product_name]
        avg_amount = 0
        count = 0
        for amount in amounts:
            amount = int(amount['Amount'])
            if amount > 0:
                count += 1
                avg_amount += amount
        avg_amount = avg_amount / count
        random_quantity = randint(1, 100)
        return int(avg_amount / random_quantity)

    data_gen.add_formula_column('UnitPrice', formula=unit_price_formula)

    data_gen.add_constant_column('IsActive', 'true')
    data_gen.add_constant_column('Pricebook2.Name', 'Standard Price Book')

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write PricebookEntry file
    data_gen.apply_transformations()

    data_gen.write(pricebook_output_file_name, [
        'External_Id__c', 'Product2.External_Id__c', 'IsActive',
        'Pricebook2.Name', 'UnitPrice', 'analyticsdemo_batch_id__c'
    ])
def run(batch_id, source_file_name, output_file_name, manager_output_file_name):
    data_gen = DataGenerator()

    # load source file
    source_columns = ['Owner.External_Id__c', 'Team__c']
    data_gen.load_source_file(source_file_name, source_columns)
    data_gen.unique()

    # rename columns
    data_gen.rename_column('Owner.External_Id__c', 'External_Id__c')
    data_gen.rename_column('Team__c', 'UserRole.Name')

    # add 3 manager users
    west_manager = ['W_User.M.' + str(len(data_gen.rows) + 1), 'West CSM']
    east_manager = ['W_User.M.' + str(len(data_gen.rows) + 2), 'East CSM']
    central_manager = ['W_User.M.' + str(len(data_gen.rows) + 3), 'Central CSM']
    ## managers from Sales ##
    # west_manager = ['RVP West', 'W_Sales_User.M.' + str(len(data_gen.rows) + 1)]
    # east_manager = ['RVP East', 'W_Sales_User.M.' + str(len(data_gen.rows) + 2)]
    # central_manager = ['RVP Central', 'W_Sales_User.M.' + str(len(data_gen.rows) + 3)]
    ########################

    data_gen.rows.append(west_manager)
    data_gen.rows.append(east_manager)
    data_gen.rows.append(central_manager)

    # generate company name
    data_gen.add_formula_column('CompanyName', formula=fake.company)

    # generate fake first and last name
    def first_name_formula(column_values):
        id = int(column_values['External_Id__c'].split('.')[-1])
        return fake.first_name_female() if id < 13 else fake.first_name_male()
    data_gen.add_formula_column('FirstName', formula=first_name_formula)
    data_gen.add_formula_column('LastName', formula=fake.last_name)

    # generate data based on fake first and last name
    data_gen.add_formula_column('Name', lambda cv: cv['FirstName'] + ' ' + cv['LastName'])

    # generate data based on fake first and last name
    def alias_formula(column_values):
        alias = (column_values['FirstName'][0] + column_values['LastName']).lower()
        trimmed_alias = alias[:8] if len(alias) > 8 else alias
        return trimmed_alias
    data_gen.add_formula_column('Alias', formula=alias_formula)
    data_gen.add_formula_column('Username', lambda cv: cv['Alias'] + '@demo.user')
    data_gen.add_formula_column('CommunityNickname', lambda cv: cv['Alias'] + str(randint(100, 999)))
    data_gen.add_formula_column('Email', lambda cv: cv['Alias'] + '@webmail.com')

    data_gen.add_formula_column('Phone', formula=fake.phone_number)

    titles = ['Customer Service Representative', 'Senior Customer Service Representative']
    data_gen.add_formula_column('Title', lambda: choice(titles, p=[.70, .30]))

    # generate constant values
    data_gen.add_constant_column('IsActive', 'false')
    data_gen.add_constant_column('TimeZoneSidKey', 'America/Los_Angeles')
    data_gen.add_constant_column('Profile.Name', 'Standard User')
    # from oppty> data_gen.add_constant_column('Profile.Name', 'Standard User')
    data_gen.add_constant_column('LocaleSidKey', 'en_US')
    data_gen.add_constant_column('LanguageLocaleKey', 'en_US')
    data_gen.add_constant_column('EmailEncodingKey', 'ISO-8859-1')
    data_gen.add_constant_column('ForecastEnabled', 'true') # this comes from Sales

    data_gen.add_constant_column('UserPermissionsAvantgoUser', 'false')
    data_gen.add_constant_column('UserPermissionsCallCenterAutoLogin', 'false')
    data_gen.add_constant_column('UserPermissionsChatterAnswersUser', 'false')
    data_gen.add_constant_column('UserPermissionsInteractionUser', 'false')
    data_gen.add_constant_column('UserPermissionsJigsawProspectingUser', 'false')
    data_gen.add_constant_column('UserPermissionsKnowledgeUser', 'false')
    data_gen.add_constant_column('UserPermissionsLiveAgentUser', 'false')
    data_gen.add_constant_column('UserPermissionsMarketingUser', 'false')
    data_gen.add_constant_column('UserPermissionsMobileUser', 'false')
    data_gen.add_constant_column('UserPermissionsOfflineUser', 'false')
    data_gen.add_constant_column('UserPermissionsSFContentUser', 'false')
    data_gen.add_constant_column('UserPermissionsSiteforceContributorUser', 'false')
    data_gen.add_constant_column('UserPermissionsSiteforcePublisherUser', 'false')
    data_gen.add_constant_column('UserPermissionsSupportUser', 'false')
    data_gen.add_constant_column('UserPermissionsWorkDotComUserFeature', 'false')
    data_gen.add_constant_column('UserPreferencesActivityRemindersPopup', 'false')
    data_gen.add_constant_column('UserPreferencesApexPagesDeveloperMode', 'false')
    data_gen.add_constant_column('UserPreferencesCacheDiagnostics', 'false')
    data_gen.add_constant_column('UserPreferencesContentEmailAsAndWhen', 'false')
    data_gen.add_constant_column('UserPreferencesContentNoEmail', 'false')
    data_gen.add_constant_column('UserPreferencesDisableAllFeedsEmail', 'false')
    data_gen.add_constant_column('UserPreferencesDisableBookmarkEmail', 'false')
    data_gen.add_constant_column('UserPreferencesDisableChangeCommentEmail', 'false')
    data_gen.add_constant_column('UserPreferencesDisableEndorsementEmail', 'false')
    data_gen.add_constant_column('UserPreferencesDisableFeedbackEmail', 'false')
    data_gen.add_constant_column('UserPreferencesDisableFileShareNotificationsForApi', 'false')
    data_gen.add_constant_column('UserPreferencesDisableFollowersEmail', 'false')
    data_gen.add_constant_column('UserPreferencesDisableLaterCommentEmail', 'false')
    data_gen.add_constant_column('UserPreferencesDisableLikeEmail', 'false')
    data_gen.add_constant_column('UserPreferencesDisableMentionsPostEmail', 'false')
    data_gen.add_constant_column('UserPreferencesDisableMessageEmail', 'false')
    data_gen.add_constant_column('UserPreferencesDisableProfilePostEmail', 'false')
    data_gen.add_constant_column('UserPreferencesDisableRewardEmail', 'false')
    data_gen.add_constant_column('UserPreferencesDisableSharePostEmail', 'false')
    data_gen.add_constant_column('UserPreferencesDisableWorkEmail', 'false')
    data_gen.add_constant_column('UserPreferencesDisCommentAfterLikeEmail', 'false')
    data_gen.add_constant_column('UserPreferencesDisMentionsCommentEmail', 'false')
    data_gen.add_constant_column('UserPreferencesDisProfPostCommentEmail', 'false')
    data_gen.add_constant_column('UserPreferencesEnableAutoSubForFeeds', 'false')
    data_gen.add_constant_column('UserPreferencesEventRemindersCheckboxDefault', 'false')
    data_gen.add_constant_column('UserPreferencesHideBiggerPhotoCallout', 'false')
    data_gen.add_constant_column('UserPreferencesHideChatterOnboardingSplash', 'false')
    data_gen.add_constant_column('UserPreferencesHideCSNDesktopTask', 'false')
    data_gen.add_constant_column('UserPreferencesHideCSNGetChatterMobileTask', 'false')
    data_gen.add_constant_column('UserPreferencesHideEndUserOnboardingAssistantModal', 'false')
    data_gen.add_constant_column('UserPreferencesHideLightningMigrationModal', 'false')
    data_gen.add_constant_column('UserPreferencesHideS1BrowserUI', 'false')
    data_gen.add_constant_column('UserPreferencesHideSecondChatterOnboardingSplash', 'false')
    data_gen.add_constant_column('UserPreferencesHideSfxWelcomeMat', 'false')
    data_gen.add_constant_column('UserPreferencesJigsawListUser', 'false')
    data_gen.add_constant_column('UserPreferencesLightningExperiencePreferred', 'false')
    data_gen.add_constant_column('UserPreferencesPathAssistantCollapsed', 'false')
    data_gen.add_constant_column('UserPreferencesPreviewLightning', 'false')
    data_gen.add_constant_column('UserPreferencesReminderSoundOff', 'false')
    data_gen.add_constant_column('UserPreferencesShowCityToExternalUsers', 'false')
    data_gen.add_constant_column('UserPreferencesShowCityToGuestUsers', 'false')
    data_gen.add_constant_column('UserPreferencesShowCountryToExternalUsers', 'false')
    data_gen.add_constant_column('UserPreferencesShowCountryToGuestUsers', 'false')
    data_gen.add_constant_column('UserPreferencesShowEmailToExternalUsers', 'false')
    data_gen.add_constant_column('UserPreferencesShowEmailToGuestUsers', 'false')
    data_gen.add_constant_column('UserPreferencesShowFaxToExternalUsers', 'false')
    data_gen.add_constant_column('UserPreferencesShowFaxToGuestUsers', 'false')
    data_gen.add_constant_column('UserPreferencesShowManagerToExternalUsers', 'false')
    data_gen.add_constant_column('UserPreferencesShowManagerToGuestUsers', 'false')
    data_gen.add_constant_column('UserPreferencesShowMobilePhoneToExternalUsers', 'false')
    data_gen.add_constant_column('UserPreferencesShowMobilePhoneToGuestUsers', 'false')
    data_gen.add_constant_column('UserPreferencesShowPostalCodeToExternalUsers', 'false')
    data_gen.add_constant_column('UserPreferencesShowPostalCodeToGuestUsers', 'false')
    data_gen.add_constant_column('UserPreferencesShowProfilePicToGuestUsers', 'false')
    data_gen.add_constant_column('UserPreferencesShowStateToExternalUsers', 'false')
    data_gen.add_constant_column('UserPreferencesShowStateToGuestUsers', 'false')
    data_gen.add_constant_column('UserPreferencesShowStreetAddressToExternalUsers', 'false')
    data_gen.add_constant_column('UserPreferencesShowStreetAddressToGuestUsers', 'false')
    data_gen.add_constant_column('UserPreferencesShowTitleToExternalUsers', 'false')
    data_gen.add_constant_column('UserPreferencesShowTitleToGuestUsers', 'false')
    data_gen.add_constant_column('UserPreferencesShowWorkPhoneToExternalUsers', 'false')
    data_gen.add_constant_column('UserPreferencesShowWorkPhoneToGuestUsers', 'false')
    data_gen.add_constant_column('UserPreferencesSortFeedByComment', 'false')
    data_gen.add_constant_column('UserPreferencesTaskRemindersCheckboxDefault', 'false')
    data_gen.add_constant_column('EmailPreferencesAutoBcc', 'false')
    data_gen.add_constant_column('EmailPreferencesAutoBccStayInTouch', 'false')
    data_gen.add_constant_column('EmailPreferencesStayInTouchReminder', 'false')
    data_gen.add_constant_column('UserPreferencesGlobalNavBarWTShown', 'false')
    data_gen.add_constant_column('UserPreferencesGlobalNavGridMenuWTShown', 'false')
    data_gen.add_constant_column('UserPreferencesCreateLEXAppsWTShown', 'false')

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write file
    data_gen.apply_transformations()
    data_gen.write(output_file_name)

    # create manager file
    data_gen.filter(lambda cv: 'CSM' not in cv['UserRole.Name'])

    manager_map = {
        'West CSR': west_manager[0],
        'East CSR': east_manager[0],
        'Central CSR': central_manager[0]
    }
    ### this is the manager file section in Sales> ###
    # # create manager file
    # data_gen.filter(lambda cv: 'RVP' not in cv['UserRole.Name'])
    # manager_map = {
    #     'West Sales': west_manager[1],
    #     'East Sales': east_manager[1],
    #     'Central Sales': central_manager[1],
    # }
    ##################################################
    data_gen.add_map_column('Manager.External_Id__c', 'UserRole.Name', manager_map)

    data_gen.apply_transformations()
    data_gen.write(manager_output_file_name, ['External_Id__c', 'Manager.External_Id__c'])
def run(batch_id,
        source_file_name,
        output_file_name,
        reference_datetime=today_datetime):
    case_status = ['Escalated', 'Waiting on Customer', 'On Hold', 'Working']

    data_gen = DataGenerator()

    # load source file
    source_columns = [
        'External_Id__c', 'Owner.External_Id__c', 'CreatedDate__c',
        'ClosedDate__c', 'First_Contact_Close__c', 'Status'
    ]
    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.rename_column('External_Id__c', 'Case.External_Id__c')
    data_gen.rename_column('Owner.External_Id__c', 'CreatedById__c')

    data_gen.add_formula_column('External_Id__c', '')

    data_gen.add_constant_column('Field__c', 'created')
    data_gen.add_constant_column('OldValue__c', '')
    data_gen.add_constant_column('NewValue__c', '')

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    data_gen.apply_transformations()

    current_count = 1
    new_rows = []
    row_count = len(data_gen.rows)
    for i in range(row_count):
        row = data_gen.rows.pop()
        column_values = data_gen.row_to_column_values(row)

        column_values['External_Id__c'] = 'W_CaseHistory.' + str(current_count)
        current_count += 1
        case_id = column_values['Case.External_Id__c']
        created_by = column_values['CreatedById__c']
        created_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        closed_date = dateutil.parser.parse(column_values['ClosedDate__c'])
        if closed_date > reference_datetime:
            closed_date = reference_datetime

        first_contact_close = column_values['First_Contact_Close__c']
        status = column_values['Status']

        # include initial created row
        new_rows.append(data_gen.column_values_to_row(column_values))

        # include new status
        new_column_values = {
            'External_Id__c': 'W_CaseHistory.' + str(current_count),
            'Case.External_Id__c': case_id,
            'CreatedById__c': created_by,
            'CreatedDate__c': created_date,
            'Field__c': 'Status',
            'OldValue__c': '',
            'NewValue__c': 'New',
            'ClosedDate__c': '',
            'First_Contact_Close__c': ''
        }
        new_rows.append(data_gen.column_values_to_row(new_column_values))
        current_count += 1

        old_value = 'New'
        next_event_date = created_date

        while next_event_date <= closed_date:
            next_event_date = next_event_date + timedelta(days=randint(0, 30))

            if first_contact_close == 'true' and status == 'Closed':
                next_event_date = closed_date
                new_column_values = {
                    'External_Id__c': 'W_CaseHistory.' + str(current_count),
                    'Case.External_Id__c': case_id,
                    'CreatedById__c': created_by,
                    'CreatedDate__c': next_event_date,
                    'Field__c': 'Status',
                    'OldValue__c': old_value,
                    'NewValue__c': 'Closed',
                    'ClosedDate__c': '',
                    'First_Contact_Close__c': ''
                }
                new_rows.append(
                    data_gen.column_values_to_row(new_column_values))
                current_count += 1
                break
            elif next_event_date >= closed_date:
                next_event_date = closed_date
                new_column_values = {
                    'External_Id__c': 'W_CaseHistory.' + str(current_count),
                    'Case.External_Id__c': case_id,
                    'CreatedById__c': created_by,
                    'CreatedDate__c': next_event_date,
                    'Field__c': 'Status',
                    'OldValue__c': old_value,
                    'NewValue__c': status,
                    'ClosedDate__c': '',
                    'First_Contact_Close__c': ''
                }
                new_rows.append(
                    data_gen.column_values_to_row(new_column_values))
                current_count += 1
                break
            else:
                new_value = case_status[randint(0, len(case_status) - 1)]

                while old_value == new_value:
                    new_value = case_status[randint(0, len(case_status) - 1)]

                new_column_values = {
                    'External_Id__c': 'W_CaseHistory.' + str(current_count),
                    'Case.External_Id__c': case_id,
                    'CreatedById__c': created_by,
                    'CreatedDate__c': next_event_date,
                    'Field__c': 'Status',
                    'OldValue__c': old_value,
                    'NewValue__c': new_value,
                    'ClosedDate__c': '',
                    'First_Contact_Close__c': '',
                    'analyticsdemo_batch_id__c': batch_id
                }
                new_rows.append(
                    data_gen.column_values_to_row(new_column_values))
                old_value = new_value
                current_count += 1

    data_gen.rows = new_rows
    data_gen.reverse()

    output_columns = [
        'External_Id__c', 'Case.External_Id__c', 'CreatedById__c',
        'CreatedDate__c', 'Field__c', 'OldValue__c', 'NewValue__c',
        'analyticsdemo_batch_id__c'
    ]
    data_gen.write(output_file_name, output_columns)
예제 #25
0
def run(batch_id, source_file_name, output_file_name, shape_file_name):
    data_gen = DataGenerator()

    # load source file
    source_columns = ['External_Id__c', 'Owner.External_Id__c']
    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.unique()

    # rename columns
    data_gen.rename_column('External_Id__c', 'Account.External_Id__c')

    data_gen.add_formula_column(
        'External_Id__c',
        lambda: 'W_Services_Opportunity.' + str(data_gen.current_row + 1))

    stages = [
        'Qualification', 'Needs Analysis', 'Proposal/Quote', 'Negotiation',
        'Closed Won', 'Closed Lost'
    ]
    data_gen.add_formula_column(
        'StageName', lambda: choice(stages, p=[.25, .20, .15, .10, .15, .15]))

    types = ['New Business', 'Add-On Business', 'Services', 'Renewal']
    data_gen.add_formula_column('Type',
                                lambda: choice(types, p=[.45, .27, .18, .10]))

    products = [
        "GC20002", "GC5000 series", "GC10001", "GC50000", "GC1000 series"
    ]
    data_gen.add_formula_column('Products__c', products)

    data_gen.add_formula_column('Amount',
                                lambda: 1000 * int(normal(1400, 350)))

    data_gen.add_formula_column(
        'Name',
        lambda cv: 'New Opportunity [' + str(data_gen.current_row + 1) + ']')

    # load shape data as dataset
    shape_columns = [
        'Account.External_Id__c', 'CreatedDate__c', 'LastActivityDate__c'
    ]
    shape_dataset = data_gen.load_dataset('shape', shape_file_name,
                                          shape_columns)

    # build map of account values
    shape_account_map = shape_dataset.group_by('Account.External_Id__c')

    # generate earliest created date
    def create_date_formula(column_values):
        accounts = shape_account_map.get(
            column_values['Account.External_Id__c'])
        create_dates = [
            dateutil.parser.parse(account['CreatedDate__c'])
            for account in accounts
        ]
        create_dates.sort()
        return (create_dates[0] -
                timedelta(days=randint(1, 45))).isoformat(sep=' ')

    data_gen.add_formula_column('DateTimeCreated__c', create_date_formula)

    # generate last activity date
    def last_activity_date_formula(column_values):
        accounts = shape_account_map.get(
            column_values['Account.External_Id__c'])
        activity_dates = [
            dateutil.parser.parse(account['LastActivityDate__c'])
            for account in accounts
        ]
        activity_dates.sort(reverse=True)
        return activity_dates[0].isoformat(sep=' ')

    data_gen.add_formula_column('LastActivityDate__c',
                                last_activity_date_formula)

    data_gen.add_copy_column('CloseDate', 'DateTimeCreated__c')

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write file
    data_gen.apply_transformations()

    output_columns = [
        'External_Id__c', 'Owner.External_Id__c', 'Account.External_Id__c',
        'DateTimeCreated__c', 'CloseDate', 'LastActivityDate__c', 'Name',
        'Products__c', 'StageName', 'Amount', 'Type',
        'analyticsdemo_batch_id__c'
    ]
    data_gen.write(output_file_name, output_columns)
def run(batch_id,
        source_file_name,
        output_file_name,
        reference_datetime=today,
        id_offset=0):
    data_gen = DataGenerator()

    # load source file
    source_columns = [
        'External_Id__c', 'Owner.External_Id__c', 'CreatedDate__c',
        'LastActivityDate__c'
    ]
    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.rename_column('External_Id__c', 'What.External_Id__c')
    data_gen.rename_column('LastActivityDate__c', 'ActivityDate')

    # generate a random number of events per opportunity
    data_gen.duplicate_rows(duplication_factor=lambda: randint(1, 3))

    data_gen.add_formula_column(
        'External_Id__c',
        formula=lambda: 'W_Event.' + str(id_offset + data_gen.current_row + 1))

    data_gen.add_formula_column('Subject', formula=event.event_subject)
    data_gen.add_formula_column('EventSubtype', formula=event.event_subtype)
    data_gen.add_formula_column('DurationInMinutes',
                                formula=event.event_call_duration)

    is_first = True

    def create_date_formula(column_values):
        oppty_create_date = dateutil.parser.parse(
            column_values['CreatedDate__c'])
        oppty_last_activity_date = dateutil.parser.parse(
            column_values['ActivityDate'])
        nonlocal is_first
        if is_first:
            create_date = oppty_last_activity_date
        else:
            create_date = fake.date_time_between_dates(
                oppty_create_date, oppty_last_activity_date)
        is_first = False
        if create_date > reference_datetime:
            create_date = reference_datetime
        return create_date.isoformat(sep=' ')

    data_gen.add_formula_column('CreatedDate__c', create_date_formula)

    def activity_date_formula(column_values):
        create_date = dateutil.parser.parse(
            column_values['CreatedDate__c']).date()
        return (create_date + timedelta(days=randint(0, 14))).isoformat()

    data_gen.add_formula_column('ActivityDate', activity_date_formula)

    def activity_datetime_formula(column_values):
        return dateutil.parser.parse(column_values['ActivityDate'])

    data_gen.add_formula_column('ActivityDateTime', activity_datetime_formula)

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write
    data_gen.apply_transformations()
    data_gen.write(output_file_name)
예제 #27
0
def run(batch_id,
        source_file_name,
        output_file_name,
        reference_datetime=today):
    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name)

    data_gen.rename_column('External_Id__c', 'Case.External_Id__c')
    data_gen.rename_column('Owner.External_Id__c', 'User.External_Id__c')

    data_gen.duplicate_rows(duplication_factor=lambda: choice(
        [1, 2, 3, 4, 5], p=[.65, .15, .10, .05, .05]))

    data_gen.add_formula_column(
        'External_Id__c',
        formula=lambda: 'W_AgentWork.' + str(data_gen.current_row + 1))

    data_gen.add_copy_column('RequestDateTime__c', 'CreatedDate__c')

    def created_date_formula(column_values):
        created_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        closed_date = dateutil.parser.parse(column_values['ClosedDate__c'])
        if closed_date > reference_datetime:
            closed_date = reference_datetime
        mid_date = created_date + (closed_date - created_date) / 2
        return fake.date_time_between_dates(created_date,
                                            mid_date).isoformat(sep=' ')

    data_gen.add_formula_column('CreatedDate__c', created_date_formula)

    def assigned_date_formula(column_values):
        created_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        return (created_date +
                timedelta(seconds=randint(0, 120))).isoformat(sep=' ')

    data_gen.add_formula_column('AssignedDateTime__c', assigned_date_formula)

    def accept_date_formula(column_values):
        assigned_date = dateutil.parser.parse(
            column_values['AssignedDateTime__c'])
        return (assigned_date +
                timedelta(seconds=randint(30, 600))).isoformat(sep=' ')

    data_gen.add_formula_column('AcceptDateTime__c', accept_date_formula)

    def close_date_formula(column_values):
        accept_date = dateutil.parser.parse(column_values['AcceptDateTime__c'])
        return (accept_date +
                timedelta(seconds=randint(30, 1800))).isoformat(sep=' ')

    data_gen.add_formula_column('CloseDateTime__c', close_date_formula)

    def active_time_formula(column_values):
        accept_date = dateutil.parser.parse(column_values['AcceptDateTime__c'])
        close_date = dateutil.parser.parse(column_values['CloseDateTime__c'])
        return int((close_date - accept_date).total_seconds())

    data_gen.add_formula_column('ActiveTime__c', active_time_formula)

    data_gen.add_formula_column('AgentCapacityWhenDeclined__c',
                                lambda: randint(30, 1800))

    def cancel_date_formula(column_values):
        assigned_date = dateutil.parser.parse(
            column_values['AssignedDateTime__c'])
        return (assigned_date +
                timedelta(seconds=randint(30, 600))).isoformat(sep=' ')

    data_gen.add_formula_column('CancelDateTime__c', cancel_date_formula)

    data_gen.add_formula_column('CapacityPercentage__c',
                                lambda: randint(1, 101))

    data_gen.add_formula_column('CapacityWeight__c', lambda: randint(1, 7))

    def decline_date_formula(column_values):
        assigned_date = dateutil.parser.parse(
            column_values['AssignedDateTime__c'])
        return (assigned_date +
                timedelta(seconds=randint(30, 600))).isoformat(sep=' ')

    data_gen.add_formula_column('DeclineDateTime__c', decline_date_formula)

    data_gen.add_formula_column('DeclineReason__c', formula=fake.sentence)

    data_gen.add_copy_column('HandleTime__c', 'ActiveTime__c')

    data_gen.add_formula_column('OriginalQueue.DeveloperName', [
        'GeneralQueue', 'InternationalQueue', 'Knowledge_Translations',
        'Social_Queue', 'TargetCampaign', 'Tier1Queue', 'Tier2Queue',
        'Tier3Queue'
    ])

    data_gen.add_formula_column('PushTimeout__c', lambda: randint(0, 100))

    def push_timeout_date_formula(column_values):
        create_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        return create_date + timedelta(seconds=column_values['PushTimeout__c'])

    data_gen.add_formula_column('PushTimeoutDateTime__c',
                                push_timeout_date_formula)

    data_gen.add_formula_column(
        'ServiceChannel.DeveloperName',
        ['Cases', 'LiveMessage', 'sfdc_liveagent', 'Leads'])

    def speed_to_answer_formula(column_values):
        request_date = dateutil.parser.parse(
            column_values['RequestDateTime__c'])
        accept_date = dateutil.parser.parse(column_values['AcceptDateTime__c'])
        return int((accept_date - request_date).total_seconds())

    data_gen.add_formula_column('SpeedToAnswer__c', speed_to_answer_formula)

    data_gen.add_formula_column('Status__c', [
        'Assigned', 'Unavailable', 'Declined', 'Opened', 'Closed',
        'DeclinedOnPushTimeout', 'Canceled'
    ])

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    def filter_func(column_values):
        created_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        cutoff_date = reference_datetime - timedelta(days=60)
        return column_values['Origin'] == 'Chat' and created_date >= cutoff_date

    data_gen.filter(filter_function=filter_func)

    data_gen.apply_transformations()

    data_gen.sort_by('RequestDateTime__c')

    output_columns = [
        'External_Id__c', 'RequestDateTime__c', 'CreatedDate__c',
        'AssignedDateTime__c', 'AcceptDateTime__c', 'CloseDateTime__c',
        'ActiveTime__c', 'AgentCapacityWhenDeclined__c', 'CancelDateTime__c',
        'CapacityPercentage__c', 'CapacityWeight__c', 'DeclineDateTime__c',
        'DeclineReason__c', 'HandleTime__c', 'OriginalQueue.DeveloperName',
        'PushTimeout__c', 'PushTimeoutDateTime__c',
        'ServiceChannel.DeveloperName', 'SpeedToAnswer__c', 'Status__c',
        'User.External_Id__c', 'Case.External_Id__c',
        'analyticsdemo_batch_id__c'
    ]
    return data_gen.write(output_file_name, output_columns, 6000)
def run(source_file_name, output_file_name):
    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name)

    # find mean and std of profit
    profits = []
    for row in data_gen.rows:
        column_values = data_gen.row_to_column_values(row)
        profits.append(float(column_values['Profit']))


    profit_mean = mean(profits)
    profit_std = std(profits)

    # filter out profits more than 2 std out.
    def filter_func(column_values):
        profit = float(column_values['Profit'])
        z_score = abs((profit - profit_mean) / profit_std)
        return z_score <= 2
    data_gen.filter(filter_function=filter_func)


    store_tier_map = {
        'New York 4': "Tier 1",
        'New York 3': "Tier 1",
        'New York 2': "Tier 1",
        'New York 1': "Tier 1",
        'Chicago 3': "Tier 1",
        'Chicago 2': "Tier 2",
        'Chicago 1': "Tier 2",
        'Boston 2': "Tier 2",
        'Boston 1': "Tier 3"
    }
    data_gen.add_map_column('Tier', 'Store', store_tier_map)


    month_channel_map = {
        'January': 'Chat',
        'February': 'Chat',
        'March': 'Chat',
        'April': 'Chat',
        'May': 'Chat',
        'June': 'Email',
        'July': 'Email',
        'August': 'Facebook',
        'September': 'Phone',
        'October': 'Phone',
        'November': 'Website',
        'December': 'Website'
    }
    data_gen.add_map_column('Origin', 'Month', month_channel_map)


    discount_support_map = {
        '0': 'Free',
        '0.05': 'Free',
        '0.15': 'Basic',
        '0.1': 'Silver',
        '0.2': 'Platinum'
    }
    data_gen.add_map_column('Type_of_Support__c', 'Discount', discount_support_map)


    camp_reason_map = {
        "Bundled": "Documentation",
        "Buy More & Save": "Unknown Failure",
        "Competitor Focus": "Feature Question",
        "Door Buster": "Hardware Question",
        "Friends & Family": "Late Delivery",
        "Local": "Software Question",
        "Paper Circular": "General Question",
        "Regional": "Item Damaged",
        "Social": "Item Damaged"
    }
    data_gen.add_map_column('Reason', 'Marketing Campaign', camp_reason_map)


    city_priority_map = {
        "Boston": "Low",
        "Chicago": "Medium",
        "New York": "High"
    }
    data_gen.add_map_column('Priority', 'City', city_priority_map)


    comp_sla_map = {
        "High": "Violation",
        "Normal": "Compliant",
        "Low": "Compliant"
    }
    data_gen.add_map_column('SLA', 'Competition', comp_sla_map)


    data_gen.add_constant_column('Status', 'Closed')


    sla_first_contact_close_map = {
        'Compliant': lambda: choice(['true', 'false'], p=[.9, .1]),
        'Violation': lambda: choice(['true', 'false'], p=[.7, .3])
    }
    data_gen.add_map_column('First_Contact_Close__c', 'SLA', sla_first_contact_close_map)


    sla_time_open_map = {
        'Compliant': lambda: choice([12, 24, 36, 48], p=[.50, .20, .20, .10]),
        'Violation': lambda: choice([60, 72, 84, 96, 108, 120], p=[.60, .20, .10, .05, .03, .02])
    }
    data_gen.add_map_column('Time_Open__c', 'SLA', sla_time_open_map)


    def region_formula(column_values):
        average_age = float(column_values['Average Age'])
        if average_age < 40:
            return 'West CSR'
        elif average_age >= 40.0 and average_age < 50:
            return 'Central CSR'
        else:
            return 'East CSR'
    data_gen.add_formula_column('Team__c', region_formula)


    def user_formula(column_values):
        average_age = float(column_values['Average Age'])
        if average_age < 40:
            return 'W_Services_User.' + str(choice([1, 2, 3, 4, 5]))
        elif average_age >= 40.0 and average_age < 50:
            return 'W_Services_User.' + str(choice([6, 7, 8, 9, 10, 11]))
        else:
            return 'W_Services_User.' + str(choice([12, 13, 14, 15, 16, 17]))
    data_gen.add_formula_column('Owner.External_Id__c', user_formula)


    # generate offer voucher - give vouchers to customers that were unhappy with Video Games or Cables to boost CSAT
    def offer_voucher_formula(column_values):
        csat = float(column_values['Profit Linear'])
        item = column_values['Item']

        if item in ['Video Games', 'Cables']:
            return choice(['true', 'false'], p=[csat/100, (100 - csat) / 100])
        else:
            return 'false'
    data_gen.add_formula_column('Offer_Voucher__c', offer_voucher_formula)


    def send_field_service_formula(column_values):
        csat = float(column_values['Profit Linear'])
        item = column_values['Item']

        if csat >= 80.0 and item == 'Tablet':
            return 'true'
        else:
            return choice(['true', 'false'], p=[.25, .75])
    data_gen.add_formula_column('Send_FieldService__c', send_field_service_formula)

    data_gen.add_map_column('IsEscalated', 'Tier', {'Tier 1': 'true', None: 'false'})

    # generate close date offset
    # random offset covering the last 14 months
    data_gen.add_formula_column('close_date_offset', lambda: randint(1, 30 * 14))


    # generate account id - generate a long tail distribution - cubic function +- randint
    # helper dataset used for account selection
    data_gen.add_dataset('current_account', {'account_id': 0, 'account_count': 0})


    # generate a distribution of account ids
    def account_id_formula(column_values):
        current_account = data_gen.datasets['current_account']
        account_id = current_account['account_id']
        account_count = current_account['account_count']

        if account_count > 0:
            # continue with the current account_id if there are still any to take
            # but first decrement account count
            account_count += -1
            current_account['account_count'] = account_count
        else:
            # use new account id
            account_id += 1
            account_count = int(round(lognormal(1))) + randint(1, 7)

            # update account dataset for next iteration
            account_count += -1
            current_account['account_count'] = account_count
            current_account['account_id'] = account_id
        return 'W_Services_Account.' + str(account_id)
    data_gen.add_formula_column('Account.External_Id__c', account_id_formula)

    def csat_formula(column_values):
        # first normalize csat between 30-90
        csat = float(column_values['Profit Linear'])
        new_delta = 70
        csat = (new_delta * csat / 100) + 30
        channel = column_values['Origin']
        is_escalated = column_values['IsEscalated']
        send_field_service = column_values['Send_FieldService__c']
        offer_voucher = column_values['Offer_Voucher__c']

        if is_escalated == 'true':
            if channel == 'Phone':
                csat = csat - 2
            else:
                csat = csat + 2

        if send_field_service == 'true':
            if channel == 'Phone':
                csat = csat - 2
            else:
                csat = csat + 4

        if offer_voucher == 'true':
            if channel == 'Phone':
                csat = csat - 2
            else:
                csat = csat + 4

        return csat
    data_gen.add_formula_column('CSAT__c', formula=csat_formula)

    data_gen.add_map_column('Outlier', 'Outlier', value_map={
        'TRUE': 'true',
        None: 'false'
    })

    data_gen.apply_transformations()


    data_gen.add_map_column('Time_Open__c', 'First_Contact_Close__c', value_map={
        'true': 0,
        None: lambda cv: cv['Time_Open__c']
    })

    data_gen.apply_transformations()

    rename_map = {
        'Item': 'Product_Family_KB__c'
    }
    data_gen.rename_columns(rename_map)

    output_columns = [
        'Origin',
        'Store',
        'Tier',
        'Product_Family_KB__c',
        'Priority',
        'Average Age',
        'Percent Male',
        'SLA',
        'Daily Revenue',
        'Reason',
        'Reg Price',
        'Type_of_Support__c',
        'Price',
        'Quantity',
        'Cost',
        'Profit',
        'CSAT__c',
        'Profit Log',
        'Outlier',
        'Status',
        'First_Contact_Close__c',
        'Time_Open__c',
        'Team__c',
        'Owner.External_Id__c',
        'close_date_offset',
        'Account.External_Id__c',
        'Offer_Voucher__c',
        'Send_FieldService__c',
        'IsEscalated'
    ]
    data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name, reference_datetime=today, id_offset=0):
    
    data_gen = DataGenerator()

    # load source file
    source_columns = [
        'External_Id__c',
        'Owner.External_Id__c',
        'CreatedDate__c',
        'LastActivityDate__c'
    ]

    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.rename_column('External_Id__c', 'What.External_Id__c')
    data_gen.rename_column('LastActivityDate__c', 'ActivityDate')

    # generate a random number of tasks per opportunity
    data_gen.duplicate_rows(duplication_factor=lambda: randint(1, 3))

    data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_Task.' + str(id_offset + data_gen.current_row + 1))

    data_gen.add_formula_column('TaskSubtype', formula=task.oppty_task_subtype)
    data_gen.add_formula_column('CallDurationInSeconds', formula=task.task_call_duration)
    data_gen.add_formula_column('CallDisposition', formula=task.task_call_disposition)
    data_gen.add_formula_column('CallType', formula=task.task_call_type)
    data_gen.add_formula_column('Status', formula=task.task_status)
    data_gen.add_formula_column('Priority', formula=task.task_priority)

    def create_date_formula(column_values):
        oppty_create_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        oppty_last_activity_date = dateutil.parser.parse(column_values['ActivityDate'])
        create_date = fake.date_time_between_dates(oppty_create_date, oppty_last_activity_date)
        if create_date > reference_datetime:
            create_date = reference_datetime
        return create_date.isoformat(sep=' ')
    
    data_gen.add_formula_column('CreatedDate__c', create_date_formula)

    def activity_date_formula(column_values):
        create_date = dateutil.parser.parse(column_values['CreatedDate__c']).date()
        return (create_date + timedelta(days=randint(0, 14))).isoformat()
    
    data_gen.add_formula_column('ActivityDate', activity_date_formula)

    data_gen.add_formula_column('Subject', formula=task.task_subject)

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write
    data_gen.apply_transformations()
    data_gen.write(output_file_name)
def run(batch_id, source_file_name, output_file_name):
    data_gen = DataGenerator()


    # load source file
    source_columns = ['Account.External_Id__c']
    data_gen.load_source_file(source_file_name, source_columns)


    # rename columns
    data_gen.rename_column('Account.External_Id__c', 'External_Id__c')

    # filter out duplicate data
    data_gen.unique()

    # load shape data as dataset
    shape_columns = [
        'Account.External_Id__c',
        'Owner.External_Id__c'
    ]
    shape_dataset = data_gen.load_dataset('shape', source_file_name, shape_columns)


    # build map of account values
    shape_account_map = shape_dataset.group_by('Account.External_Id__c')


    # helper method to get shape data related to an account
    def get_shape_data(column_values, shape_column_name):
        return shape_account_map.get(column_values['External_Id__c'])[0].get(shape_column_name)

    # generate name
    data_gen.add_formula_column('Name', formula=account.account_name)

    # generate owner
    def owner_formula(column_values):
        return get_shape_data(column_values, 'Owner.External_Id__c')
    data_gen.add_formula_column('Owner.External_Id__c', owner_formula)


    # generate account source
    data_gen.add_formula_column('AccountSource', formula=account.account_source)


    # generate annual revenue
    data_gen.add_formula_column('AnnualRevenue', lambda: 1000 * int(normal(2800, 600)))


    # generate billing street
    data_gen.add_formula_column('BillingStreet', formula=lambda: fake.building_number() + ' ' + fake.street_name())


    # generate billing city
    data_gen.add_formula_column('BillingCity', formula=fake.city)


    # generate billing state
    data_gen.add_formula_column('BillingState', formula=fake.state_abbr)


    # generate billing postal code
    data_gen.add_formula_column('BillingPostalCode', formula=fake.zipcode)


    # generate billing country
    data_gen.add_constant_column('BillingCountry', 'USA')


    # generate industry
    data_gen.add_formula_column('Industry', formula=account.account_industry)


    # generate number employees
    data_gen.add_formula_column('NumberOfEmployees', lambda: int(normal(150, 35)))


    # generate ownership
    data_gen.add_formula_column('Ownership', formula=account.account_ownership)


    # generate phone
    data_gen.add_formula_column('Phone', formula=fake.phone_number)


    # generate rating
    data_gen.add_formula_column('Rating', formula=account.account_rating)


    # generate type
    data_gen.add_formula_column('Type', formula=account.account_type)


    # generate year started
    data_gen.add_formula_column('YearStarted', formula=account.account_year_started)

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write file
    data_gen.apply_transformations()
    data_gen.write(output_file_name)