Ejemplo n.º 1
0
def run(batch_id, source_file_name, output_file_name, filter_function=None):
    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name)

    data_gen.add_formula_column(
        'Contact.External_Id__c',
        lambda cv: cv['Account.External_Id__c'].replace(
            'W_Account', 'W_Contact'))

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    data_gen.apply_transformations()

    if filter_function:
        data_gen.filter(filter_function)

    output_columns = [
        'External_Id__c', 'Owner.External_Id__c', 'Account.External_Id__c',
        'Contact.External_Id__c', 'CreatedDate__c', 'ClosedDate__c',
        'LastActivityDate__c', 'Origin', 'Tier', 'Product_Family_KB__c',
        'Priority', 'SLA', 'Reason', 'Type_of_Support__c', 'CSAT__c', 'Status',
        'First_Contact_Close__c', 'Time_Open__c', 'Team__c',
        'close_date_offset', 'Offer_Voucher__c', 'Send_FieldService__c',
        'IsEscalated', 'MilestoneStatus__c', 'analyticsdemo_batch_id__c'
    ]
    data_gen.write(output_file_name, output_columns)
Ejemplo n.º 2
0
    def generate(self, selected_filters=None, columns=None, count=5):
        if selected_filters is None:
            selected_filters = {}
        if columns is None:
            columns = self.get_columns()

        data_gen = DataGenerator()
        data_gen.row_count = count

        if 'gender' in selected_filters:
            if selected_filters['gender'] == 'male':
                data_gen.add_constant_column('Gender', 'Male')
            else:
                data_gen.add_constant_column('Gender', 'Female')
        else:
            data_gen.add_formula_column('Gender', formula=fake.gender)

        def first_name_formula(column_values):
            if column_values['Gender'] == 'Male':
                return fake.first_name_male()
            else:
                return fake.first_name_female()

        data_gen.add_formula_column('First Name', first_name_formula)

        data_gen.add_formula_column('Last Name', formula=fake.last_name)
        data_gen.add_formula_column('Name', lambda cv: cv['First Name'] + ' ' + cv['Last Name'])

        data_gen.apply_transformations()
        return list(map(lambda r: data_gen.row_to_column_values(r, columns).values(), data_gen.rows))
def run(batch_id, source_file_name, output_file_name):
    data_gen = DataGenerator()

    # load source file
    source_columns = [
        'External_Id__c', 'AccountExternalId__c', 'Owner.External_Id__c',
        'Name', 'Amount', 'StageName', 'LeadSource', 'Type',
        'ForecastCategoryName', 'CloseDate', 'CreatedDate__c',
        'RecordType.DeveloperName', 'LastActivityDate__c', 'Product2Name__c',
        'Product2Family__c', 'Region__c', 'TimeToClose__c',
        'SalesStageCount__c', 'AccountAnnualRevenue__c',
        'AccountNumberOfEmployees__c', 'AccountBookings__c', 'Competitor__c',
        'DealSizeCategory__c', 'Exec_Meeting__c', 'Interactive_Demo__c'
    ]
    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.rename_column('AccountExternalId__c', 'Account.External_Id__c')
    data_gen.rename_column('CreatedDate__c', 'DateTimeCreated__c')

    data_gen.add_formula_column(
        'LastModifiedDate__c',
        lambda cv: dateutil.parser.parse(cv['LastActivityDate__c']))

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    data_gen.apply_transformations()

    data_gen.write(output_file_name)
Ejemplo n.º 4
0
def run(batch_id, source_file_name, output_file_name, source_service_resources,
        source_service_appointments):
    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name)

    service_resources = data_gen.load_dataset("ServiceResources",
                                              source_service_resources,
                                              ['Id', 'External_ID__c']).dict(
                                                  'Id', 'External_ID__c')
    data_gen.add_map_column('ServiceResource.External_Id__c',
                            'ServiceResourceId', service_resources)

    service_appointments = data_gen.load_dataset(
        "ServiceAppointments", source_service_appointments,
        ['Id', 'External_ID__c']).dict('Id', 'External_ID__c')
    data_gen.add_map_column('ServiceAppointment.External_Id__c',
                            'ServiceAppointmentId', service_appointments)

    data_gen.apply_transformations()

    data_gen.write(output_file_name,
                   columns=[
                       'External_ID__c', 'ServiceResource.External_Id__c',
                       'ServiceAppointment.External_Id__c', 'ActualTravelTime',
                       'EstimatedTravelTime'
                   ])
def run(batch_id, source_file_name, output_file_name, source_service_resources, delta=timedelta(days=14)):
    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name)


    data_gen.add_formula_column('Start',
                                lambda cv: "" if cv['Start'] == "" else (dateutil.parser.parse(cv['Start']) + timedelta(days=delta.days - 1)).replace(tzinfo=None))

    data_gen.add_formula_column('End',
                                lambda cv: "" if cv['End'] == "" else (dateutil.parser.parse(cv['End']) + timedelta(days=delta.days - 1)).replace(tzinfo=None))


    service_resources = data_gen.load_dataset("ServiceResources", source_service_resources, ['Id', 'External_ID__c']).dict('Id', 'External_ID__c')

    data_gen.add_map_column('Resource.External_Id__c', 'ResourceId', service_resources)

    data_gen.apply_transformations()

    data_gen.add_copy_column('CreatedDate__c', 'Start')

    data_gen.apply_transformations()

    data_gen.write(output_file_name, columns=[
        'External_ID__c',
        'Resource.External_Id__c',
        'CreatedDate__c',
        'Start',
        'End',
        'Type',
        #'State',
        #'Country',
        #'City'
    ])
Ejemplo n.º 6
0
def run(batch_id,
        source_file_name,
        output_file_name,
        source_operating_hours,
        reference_datetime=today_datetime):
    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name)

    data_gen.add_constant_column('CreatedDate__c',
                                 reference_datetime.isoformat(sep=' '))

    operating_hours = data_gen.load_dataset("OperatingHours",
                                            source_operating_hours,
                                            ['Id', 'External_ID__c']).dict(
                                                'Id', 'External_ID__c')

    data_gen.add_map_column('OperatingHours.External_Id__c',
                            'OperatingHoursId', operating_hours)

    data_gen.apply_transformations()

    data_gen.write(output_file_name,
                   columns=[
                       'External_ID__c', 'OperatingHours.External_Id__c',
                       'StartTime', 'EndTime'
                   ])
Ejemplo n.º 7
0
def run(batch_id, source_file_name, output_file_name):
    data_gen = DataGenerator()

    # load source file
    source_columns = ['KnowledgeArticle.External_Id__c']
    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.rename_column('KnowledgeArticle.External_Id__c',
                           'Parent.External_Id__c')

    data_gen.add_formula_column(
        'External_Id__c',
        formula=lambda: 'W_KCSArticle_DCS.' + str(data_gen.current_row + 1))

    data_gen.add_constant_column('DataCategoryGroupName__c', 'All')

    data_gen.add_constant_column('DataCategoryName__c', 'All')

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write file
    data_gen.apply_transformations()

    output_columns = [
        'External_Id__c', 'Parent.External_Id__c', 'DataCategoryGroupName__c',
        'DataCategoryName__c', 'analyticsdemo_batch_id__c'
    ]
    data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name, reference_date=today):
    data_gen = DataGenerator()

    # load source file
    source_columns = ['External_Id__c', 'Name', 'UserRole.Name']
    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.filter(lambda cv: 'RVP' not in cv['UserRole.Name'])
    data_gen.filter(
        lambda cv: 'CSM' not in cv['UserRole.Name'])  # comes from Service

    data_gen.rename_column('External_Id__c', 'QuotaOwner_Id__c')
    data_gen.rename_column('Name', 'OwnerName__c')

    # generate id
    data_gen.add_formula_column(
        'External_Id__c',
        formula=lambda: 'W_Quota.' + str(data_gen.current_row + 1))

    data_gen.duplicate_rows(24)

    def quota_formula():
        # first month of quarter = 300k
        # second month of quarter = 500k
        # third month of quarter = 500k
        quarter = data_gen.current_row % 3
        if quarter == 0:
            return 300000
        elif quarter == 1:
            return 750000
        else:
            return 500000

    data_gen.add_formula_column('QuotaAmount__c', quota_formula)

    current_year = reference_date.year
    last_year = current_year - 1

    def start_date_formula():
        user_row = data_gen.current_row % 24
        month = str((user_row % 12) + 1).zfill(2)
        day = '01'
        if user_row < 12:
            year = str(last_year)
        else:
            year = str(current_year)
        return dateutil.parser.parse(year + '-' + month + '-' + day).date()

    data_gen.add_formula_column('StartDate__c', start_date_formula)

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write file
    data_gen.apply_transformations()
    data_gen.write(output_file_name, [
        'External_Id__c', 'QuotaOwner_Id__c', 'OwnerName__c', 'StartDate__c',
        'QuotaAmount__c'
    ])
Ejemplo n.º 9
0
def run(batch_id, source_file_name, output_file_name, products_file_name, pricebook_file_name):
    data_gen = DataGenerator()

    # load source file
    source_columns = ['External_Id__c', 'Product2Name__c', 'Amount']
    data_gen.load_source_file(source_file_name, source_columns)

    # load datasets
    products = data_gen.load_dataset('products', products_file_name)
    products_by_name = products.group_by('Name')

    pricebook = data_gen.load_dataset('pricebook', pricebook_file_name)
    pricebook_by_product = pricebook.group_by('Product2.External_Id__c')

    # rename columns
    data_gen.rename_column('External_Id__c', 'Opportunity.External_Id__c')
    data_gen.rename_column('Amount', 'TotalPrice')

    data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_OpportunityLineItem.' + str(data_gen.current_row + 1))

    # transform product name to code
    data_gen.add_formula_column('ProductCode', lambda cv: products_by_name[cv['Product2Name__c']][0]['ProductCode'])

    # generate product reference id
    data_gen.add_formula_column('Product2.External_Id__c',
                                lambda cv: products_by_name[cv['Product2Name__c']][0]['External_Id__c'])

    # generate list price
    data_gen.add_formula_column('ListPrice', lambda cv: pricebook_by_product[cv['ProductCode']][0]['UnitPrice'])

    # generate pricebook reference id
    data_gen.add_formula_column('PricebookEntry.External_Id__c',
                                lambda cv: pricebook_by_product[cv['ProductCode']][0]['External_Id__c'])

    # generate quantity
    def quanity_formula(column_values):
        total_price = int(column_values['TotalPrice'])
        list_price = int(column_values['ListPrice'])
        quantity = total_price / list_price
        if quantity <= 0:
            quantity = 1
        return ceil(quantity)
    data_gen.add_formula_column('Quantity', quanity_formula)

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write file
    data_gen.apply_transformations()

    output_columns = [
        'External_Id__c',
        'Opportunity.External_Id__c',
        'TotalPrice',
        'PricebookEntry.External_Id__c',
        'Quantity',
        'analyticsdemo_batch_id__c'
    ]
    data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name):
    data_gen = DataGenerator()

    # load source file
    source_columns = [
        'KnowledgeArticle.External_Id__c', 'User.External_Id__c',
        'CreatedDate__c'
    ]
    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.rename_column('KnowledgeArticle.External_Id__c',
                           'KCSArticle__ka.External_Id__c')
    data_gen.rename_column('User.External_Id__c', 'Owner.External_Id__c')

    data_gen.add_formula_column(
        'External_Id__c',
        formula=lambda: 'W_KCSArticleVersion.' + str(data_gen.current_row + 1))

    data_gen.add_formula_column('ArticleNumber__c',
                                lambda: data_gen.current_row + 1)

    data_gen.add_formula_column('PublishStatus__c', ['Archived', 'Online'])

    data_gen.add_constant_column('IsLatestVersion__c', 'true')
    data_gen.add_constant_column('IsVisibleInApp__c', 'true')
    data_gen.add_constant_column('IsVisibleInCsp__c', 'true')
    data_gen.add_constant_column('IsVisibleInPkb__c', 'true')
    data_gen.add_constant_column('IsVisibleInPrm__c', 'true')

    data_gen.add_constant_column('VersionNumber__c', '1')
    data_gen.add_constant_column('Language__c', 'en_US')

    titles = [
        "Health", "Computers", "Music", "Tools", "Home", "Outdoors",
        "Jewelery", "Toys", "Grocery", "Clothing", "Games", "Automotive",
        "Beauty", "Garden", "Books", "Industrial", "Baby", "Kids", "Movies",
        "Sports", "Shoes", "Electronics"
    ]
    data_gen.add_formula_column('Title__c', titles)

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write file
    data_gen.apply_transformations()

    output_columns = [
        'External_Id__c', 'ArticleNumber__c', 'CreatedDate__c',
        'Owner.External_Id__c', 'PublishStatus__c', 'IsLatestVersion__c',
        'IsVisibleInApp__c', 'IsVisibleInCsp__c', 'IsVisibleInPkb__c',
        'IsVisibleInPrm__c', 'KCSArticle__ka.External_Id__c', 'Title__c',
        'VersionNumber__c', 'Language__c', 'analyticsdemo_batch_id__c'
    ]
    data_gen.write(output_file_name, output_columns)
Ejemplo n.º 11
0
def run(source_file_name, prefix, output_file_name):
    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name)

    data_gen.add_formula_column('External_ID__c', formula=lambda cv: cv['External_ID__c'] if 'External_ID__c' in cv and not str(cv['External_ID__c']) == "" else prefix + '.' + str(data_gen.current_row + 1 + 100))

    data_gen.apply_transformations()

    # write to new path
    data_gen.write(output_file_name)
def run(batch_id, source_file_name, output_file_name, reference_datetime=today, id_offset=0):
    
    data_gen = DataGenerator()

    # load source file
    source_columns = [
        'External_Id__c',
        'Owner.External_Id__c',
        'CreatedDate__c',
        'LastActivityDate__c'
    ]

    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.rename_column('External_Id__c', 'What.External_Id__c')
    data_gen.rename_column('LastActivityDate__c', 'ActivityDate')

    # generate a random number of tasks per opportunity
    data_gen.duplicate_rows(duplication_factor=lambda: randint(1, 3))

    data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_Task.' + str(id_offset + data_gen.current_row + 1))

    data_gen.add_formula_column('TaskSubtype', formula=task.oppty_task_subtype)
    data_gen.add_formula_column('CallDurationInSeconds', formula=task.task_call_duration)
    data_gen.add_formula_column('CallDisposition', formula=task.task_call_disposition)
    data_gen.add_formula_column('CallType', formula=task.task_call_type)
    data_gen.add_formula_column('Status', formula=task.task_status)
    data_gen.add_formula_column('Priority', formula=task.task_priority)

    def create_date_formula(column_values):
        oppty_create_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        oppty_last_activity_date = dateutil.parser.parse(column_values['ActivityDate'])
        create_date = fake.date_time_between_dates(oppty_create_date, oppty_last_activity_date)
        if create_date > reference_datetime:
            create_date = reference_datetime
        return create_date.isoformat(sep=' ')
    
    data_gen.add_formula_column('CreatedDate__c', create_date_formula)

    def activity_date_formula(column_values):
        create_date = dateutil.parser.parse(column_values['CreatedDate__c']).date()
        return (create_date + timedelta(days=randint(0, 14))).isoformat()
    
    data_gen.add_formula_column('ActivityDate', activity_date_formula)

    data_gen.add_formula_column('Subject', formula=task.task_subject)

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write
    data_gen.apply_transformations()
    data_gen.write(output_file_name)
def run(input_path, output_path, config_source):
    configs = json.loads(file_to_string(config_source))
    time_shifting_file = configs.get('timeShiftingPivot').get('fileName')
    time_shifting_field = configs.get('timeShiftingPivot').get('fieldName')

    data_gen = DataGenerator()

    data_gen.load_source_file(input_path + time_shifting_file,
                              time_shifting_field)
    if time_shifting_field != 'LastProcessedDate':
        aux_date = max([x[0] for x in data_gen.rows])[:10]
    else:
        # LastProcessedDate is a constant column, do not iterate through all rows
        aux_date = data_gen.rows[0][0][:10]

    delta_to_increase = (today -
                         datetime.strptime(aux_date, "%Y-%m-%d").date()).days

    def aux_date_formula(dateToShift):
        def date_formula(column_values):
            if column_values[dateToShift] != "":
                create_date = dateutil.parser.parse(column_values[dateToShift])
                if len(column_values[dateToShift]) == 19:
                    return (create_date + timedelta(days=delta_to_increase)
                            ).strftime('%Y-%m-%d %H:%M:%S')
                elif len(column_values[dateToShift]) < 24:
                    return (
                        create_date +
                        timedelta(days=delta_to_increase)).strftime('%Y-%m-%d')
                else:
                    return (create_date + timedelta(days=delta_to_increase)
                            ).strftime('%Y-%m-%dT%H:%M:%S.000Z')

        data_gen.add_formula_column(dateToShift, date_formula)

    if not output_path:
        output_path = 'output/'

    for input_file in configs.get('inputFiles'):

        file_name = input_file.get('fileName')
        date_fields = input_file.get('dateFields', [])
        print("Timeshifting process for ", file_name, " will start ...")
        data_gen.load_source_file(input_path + file_name)

        data_gen.add_constant_column('LastProcessedDate', today.isoformat())

        for dateToShift in date_fields:
            aux_date_formula(dateToShift)

        data_gen.apply_transformations()
        data_gen.write(output_path + file_name)
def run(batch_id, source_file_name, output_file_name, source_accounts):
    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name)

    accounts = data_gen.load_dataset("Accounts", source_accounts, ['Id', 'External_ID__c']).dict('Id', 'External_ID__c')

    data_gen.add_map_column('Account.External_Id__c', 'AccountId', accounts)

    data_gen.apply_transformations()

    data_gen.write(output_file_name, columns=[
        'External_ID__c',
        'Account.External_Id__c',
        'Subject'
    ])
Ejemplo n.º 15
0
def run(batch_id, source_file_name, output_file_name):
    data_gen = DataGenerator()

    # load source file
    source_columns = ['KnowledgeArticle.External_Id__c', 'CreatedDate__c']
    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.rename_column('KnowledgeArticle.External_Id__c', 'External_Id__c')

    data_gen.add_formula_column('ArticleNumber__c',
                                lambda: data_gen.current_row + 1)

    data_gen.add_formula_column('CaseAssociationCount__c',
                                lambda: randint(1, 6))

    def first_published_date_formula(column_values):
        create_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        return (create_date +
                timedelta(days=randint(1, 10))).isoformat(sep=' ')

    data_gen.add_formula_column('FirstPublishedDate__c',
                                formula=first_published_date_formula)

    def last_published_date_formula(column_values):
        first_publised_date = dateutil.parser.parse(
            column_values['FirstPublishedDate__c'])
        return (first_publised_date +
                timedelta(days=randint(1, 10))).isoformat(sep=' ')

    data_gen.add_formula_column('LastPublishedDate__c',
                                formula=last_published_date_formula)

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write file
    data_gen.apply_transformations()

    output_columns = [
        'ArticleNumber__c', 'External_Id__c', 'CaseAssociationCount__c',
        'CreatedDate__c', 'FirstPublishedDate__c', 'LastPublishedDate__c',
        'analyticsdemo_batch_id__c'
    ]
    data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name, source_products):
    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name)

    products = data_gen.load_dataset("Products", source_products,
                                     ['Id', 'External_ID__c']).dict(
                                         'Id', 'External_ID__c')
    data_gen.add_map_column('Product2.External_Id__c', 'Product2Id', products)

    data_gen.add_constant_column('Pricebook2.Name', 'Standard Price Book')

    data_gen.apply_transformations()

    data_gen.write(output_file_name,
                   columns=[
                       'External_Id__c', 'Product2.External_Id__c', 'IsActive',
                       'Pricebook2.Name', 'UnitPrice'
                   ])
Ejemplo n.º 17
0
def run(batch_id, source_file_name, output_file_name):
    data_gen = DataGenerator()

    # load source file
    account_columns = ['External_Id__c']
    data_gen.load_source_file(source_file_name, account_columns)

    data_gen.rename_column('External_Id__c', 'Account.External_Id__c')
    data_gen.add_formula_column(
        'External_Id__c', lambda cv: cv['Account.External_Id__c'].replace(
            'W_Account', 'W_Contact'))

    data_gen.add_formula_column('FirstName', formula=fake.first_name)
    data_gen.add_formula_column('LastName', formula=fake.last_name)

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    data_gen.apply_transformations()
    data_gen.write(output_file_name)
Ejemplo n.º 18
0
def run(batch_id, source_file_name, output_file_name):
    data_gen = DataGenerator()

    # load source file
    source_columns = ['External_Id__c','UserRole.Name']
    data_gen.load_source_file(source_file_name, source_columns)

    # data_gen.filter(lambda cv: 'RVP' in cv['UserRole.Name']) # commented out because using shape file from service with no RVP value in UserRole.Name
    data_gen.filter(lambda cv: 'CSM' in cv['UserRole.Name']) # comes from Service

    data_gen.rename_column('External_Id__c', 'ForecastUser.External_Id__c')

    data_gen.rename_column('UserRole.Name', 'Name')

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write file
    data_gen.apply_transformations()
    data_gen.write(output_file_name, ['Name','ForecastUser.External_Id__c','analyticsdemo_batch_id__c'])
def run(batch_id, source_file_name, output_file_name, source_pricebook, source_work_orders):
    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name)

    pricebook = data_gen.load_dataset("Pricebook", source_pricebook, ['Id', 'External_Id__c']).dict('Id', 'External_Id__c')
    data_gen.add_map_column('PricebookEntry.External_Id__c', 'PricebookEntryId', pricebook)

    work_orders = data_gen.load_dataset("WorkOrders", source_work_orders, ['Id', 'External_ID__c']).dict('Id', 'External_ID__c')
    data_gen.add_map_column('WorkOrder.External_Id__c', 'WorkOrderId', work_orders)

    data_gen.apply_transformations()

    data_gen.write(output_file_name, columns=[
        'External_ID__c',
        'PricebookEntry.External_Id__c',
        'WorkOrder.External_Id__c',
        'QuantityConsumed'
    ])
Ejemplo n.º 20
0
def run(batch_id, source_file_name, output_file_name, source_operating_hours):
    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name)

    operating_hours = data_gen.load_dataset("OperatingHours", source_operating_hours, ['Id', 'External_ID__c']).dict('Id', 'External_ID__c')

    data_gen.add_map_column('OperatingHours.External_Id__c', 'OperatingHoursId', operating_hours)

    data_gen.apply_transformations()

    data_gen.write(output_file_name, columns=[
        'External_ID__c',
        'Name',
        'OperatingHours.External_Id__c',
        'State',
        'IsActive',
        'Country',
        'City'
    ])
Ejemplo n.º 21
0
def run(batch_id, source_file_name, output_file_name):
    data_gen = DataGenerator()

    # load source file
    source_columns = [
        'KnowledgeArticle.External_Id__c',
        'User.External_Id__c',
        'CreatedDate__c'
    ]
    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.rename_column('KnowledgeArticle.External_Id__c', 'Parent.External_Id__c')
    data_gen.rename_column('User.External_Id__c', 'Owner.External_Id__c')

    data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_KCSArticle_ViewStat.' + str(data_gen.current_row + 1))

    channels = [
        'App',
        'Desktop Site',
        'Mobile Site'
    ]
    data_gen.add_formula_column('Channel__c', channels)

    data_gen.add_formula_column('ViewCount__c', formula=lambda: randint(1, 100))

    data_gen.add_formula_column('NormalizedScore__c', formula=lambda: round(uniform(1, 10), 3))
    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)
    # apply transformations and write file
    data_gen.apply_transformations()

    output_columns = [
        'External_Id__c',
        'Channel__c',
        'Parent.External_Id__c',
        'ViewCount__c',
        'NormalizedScore__c',
        'analyticsdemo_batch_id__c'
    ]
    data_gen.write(output_file_name, output_columns)
def run(input_path, output_path, config_source):
    configs = json.loads(file_to_string(config_source))
    time_shifting_file = configs.get('timeShiftingPivot').get('fileName')
    time_shifting_field = configs.get('timeShiftingPivot').get('fieldName')

    data_gen = DataGenerator()

    data_gen.load_source_file(input_path + time_shifting_file, time_shifting_field)

    aux_date = max([x[0] for x in data_gen.rows])

    delta_to_increase = (today - datetime.strptime(aux_date, "%Y-%m-%d").date()).days

    def aux_date_formula(dateToShift):
        def date_formula(column_values):
            if column_values[dateToShift] != "":
                create_date = dateutil.parser.parse(column_values[dateToShift])
                if len(column_values[dateToShift]) == 19:
                    return (create_date + timedelta(days=delta_to_increase)).strftime('%Y-%m-%d %H:%M:%S')
                elif len(column_values[dateToShift]) < 24:
                    return (create_date + timedelta(days=delta_to_increase)).strftime('%Y-%m-%d')
                else:
                    return (create_date + timedelta(days=delta_to_increase)).strftime('%Y-%m-%dT%H:%M:%S.000Z')

        data_gen.add_formula_column(dateToShift, date_formula)

    if not output_path:
        output_path = 'output/'

    for input_file in configs.get('inputFiles'):

        file_name = input_file.get('fileName')
        date_fields = input_file.get('dateFields', [])

        data_gen.load_source_file(input_path + file_name)

        for dateToShift in date_fields:
            aux_date_formula(dateToShift)
            data_gen.apply_transformations()
        data_gen.write(output_path + file_name)
Ejemplo n.º 23
0
def run(batch_id, source_file_name, output_file_name):
    data_gen = DataGenerator()

    # load source file
    source_columns = [
        'External_Id__c', 'Owner.External_Id__c', 'CreatedDate__c'
    ]
    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.rename_column('External_Id__c', 'Case.External_Id__c')
    data_gen.rename_column('Owner.External_Id__c', 'User.External_Id__c')

    # todo one case article per case? at most 1? distribution?
    data_gen.duplicate_rows(
        duplication_factor=lambda: choice([0, 1], p=[.75, .25]))

    data_gen.add_formula_column(
        'External_Id__c',
        formula=lambda: 'W_CaseArticle.' + str(data_gen.current_row + 1))
    data_gen.add_formula_column(
        'KnowledgeArticle.External_Id__c',
        formula=lambda: 'W_KCSArticle.' + str(data_gen.current_row + 1))

    data_gen.add_constant_column('ArticleVersionNumber__c', 1)

    data_gen.add_constant_column('IsSharedByEmail__c', ['true', 'false'])

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    # apply transformations and write file
    data_gen.apply_transformations()

    output_columns = [
        'External_Id__c', 'User.External_Id__c', 'ArticleVersionNumber__c',
        'CreatedDate__c', 'KnowledgeArticle.External_Id__c',
        'IsSharedByEmail__c', 'Case.External_Id__c',
        'analyticsdemo_batch_id__c'
    ]
    data_gen.write(output_file_name, output_columns)
def update(source_file_name, output_file_name, source_work_orders):
    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name)

    service_appointments = data_gen.load_dataset("WorkOrders", source_work_orders, ['External_ID__c']).dict('External_ID__c', 'External_ID__c')
    service_appointments[None] = 'None'

    data_gen.add_map_column('WorkOrder.External_Id__c', 'WorkOrder.External_Id__c', service_appointments)

    data_gen.apply_transformations()

    data_gen.filter(lambda cv: cv['WorkOrder.External_Id__c'].startswith('WO.'))

    data_gen.apply_transformations()

    data_gen.write(output_file_name, columns=[
        'External_ID__c',
        'PricebookEntry.External_Id__c',
        'WorkOrder.External_Id__c',
        'QuantityConsumed'
    ])
Ejemplo n.º 25
0
def run(batch_id, source_file_name, output_file_name, source_cases, source_accounts, source_work_types, source_service_appointments, reference_datetime=today_datetime):
    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name)

    cases = data_gen.load_dataset("Cases", source_cases, ['Id', 'External_ID__c']).dict('Id', 'External_ID__c')
    data_gen.add_map_column('Case.External_Id__c', 'CaseId', cases)

    accounts = data_gen.load_dataset("Accounts", source_accounts, ['Id', 'External_ID__c']).dict('Id', 'External_ID__c')
    data_gen.add_map_column('Account.External_Id__c', 'AccountId', accounts)

    work_types = data_gen.load_dataset("WorkTypes", source_work_types, ['Id', 'External_ID__c']).dict('Id', 'External_ID__c')
    data_gen.add_map_column('WorkType.External_Id__c', 'WorkTypeId', work_types)

    data_gen.add_constant_column('Pricebook2.Name', 'Standard Price Book')

    service_appointment_dates = data_gen.load_dataset("ServiceAppointmentDates", source_service_appointments, ['WorkOrder.External_Id__c', 'CreatedDate__c']).dict('WorkOrder.External_Id__c', 'CreatedDate__c')
    service_appointment_dates[None] = reference_datetime + timedelta(days=-1)
    data_gen.add_map_column('CreatedDate__c', 'External_ID__c', service_appointment_dates)

    data_gen.apply_transformations()

    data_gen.filter(lambda cv: cv['WorkType.External_Id__c'].startswith('WT.'))

    data_gen.apply_transformations()

    data_gen.write(output_file_name, columns=[
        'External_ID__c',
        'CreatedDate__c',
        'Status',
        'Pricebook2.Name',
        'Priority',
        'Case.External_Id__c',
        'Account.External_Id__c',
        'WorkType.External_Id__c'
    ])
Ejemplo n.º 26
0
def updateCreatedDate(source_file_name,
                      output_file_name,
                      source_service_appointments,
                      reference_datetime=today_datetime):
    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name)

    service_appointments = data_gen.load_dataset(
        "ServiceAppointments", source_service_appointments,
        ['External_ID__c']).dict('External_ID__c', 'External_ID__c')
    service_appointments[None] = 'None'

    data_gen.add_map_column('ServiceAppointment.External_Id__c',
                            'ServiceAppointment.External_Id__c',
                            service_appointments)

    data_gen.apply_transformations()

    data_gen.filter(lambda cv: cv['ServiceAppointment.External_Id__c'].
                    startswith('ServiceAppointment'))

    data_gen.apply_transformations()

    service_appointment_dates = data_gen.load_dataset(
        "ServiceAppointmentDates", source_service_appointments,
        ['External_ID__c', 'CreatedDate__c']).dict('External_ID__c',
                                                   'CreatedDate__c')
    service_appointment_dates[None] = reference_datetime + timedelta(days=-1)
    data_gen.add_map_column('CreatedDate__c',
                            'ServiceAppointment.External_Id__c',
                            service_appointment_dates)

    data_gen.apply_transformations()

    data_gen.write(output_file_name,
                   columns=[
                       'External_ID__c', 'ServiceResource.External_Id__c',
                       'ServiceAppointment.External_Id__c', 'CreatedDate__c',
                       'ActualTravelTime', 'EstimatedTravelTime'
                   ])
def run(batch_id, source_file_name, output_file_name):
    data_gen = DataGenerator()

    # load source file
    source_columns = [
        'External_Id__c', 'Owner.External_Id__c', 'CreatedDate__c',
        'EndTime__c', 'EndedBy__c', 'Status__c'
    ]
    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.rename_column('Owner.External_Id__c', 'Agent.External_Id__c')

    data_gen.add_copy_column('LiveChatTranscript.External_Id__c',
                             'External_Id__c')
    data_gen.add_copy_column('Time__c', 'CreatedDate__c')

    data_gen.add_constant_column('Type__c', '')
    data_gen.add_constant_column('Detail__c', '')

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    data_gen.apply_transformations()

    type_detail_map = {
        "ChatRequest": "Visitor requested chat.",
        "ChoiceRoute":
        "Choice chat request routed to all available qualified agents.",
        "CancelNoAgent":
        "Chat request canceled because no qualifying agents were available.",
        "Accept": "Chat request accepted by agent.",
        "CancelVisitor": "Visitor clicked Cancel Chat.",
        "LeaveAgent": "Agent left chat.",
        "EndAgent": "Agent clicked End Chat.",
        "LeaveVisitor": "Visitor left chat.",
        "EndVisitor": "Visitor clicked End Chat."
    }

    current_count = 1
    new_rows = []
    row_count = len(data_gen.rows)
    for i in range(row_count):
        row = data_gen.rows.pop()
        column_values = data_gen.row_to_column_values(row)

        live_chat = column_values['LiveChatTranscript.External_Id__c']
        agent = column_values['Agent.External_Id__c']
        create_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        end_date = dateutil.parser.parse(column_values['EndTime__c'])
        ended_by = column_values['EndedBy__c']
        status = column_values['Status__c']

        # initialize chat request
        new_column_values = {
            'External_Id__c':
            'W_LiveChatTranscriptEvent.' + str(current_count),
            'LiveChatTranscript.External_Id__c': live_chat,
            'Agent.External_Id__c': agent,
            'CreatedDate__c': create_date.isoformat(sep=' '),
            'Time__c': create_date.isoformat(sep=' '),
            'Type__c': 'ChatRequest',
            'Detail__c': 'Visitor requested chat.',
            'analyticsdemo_batch_id__c': batch_id
        }
        current_count += 1
        new_rows.append(data_gen.column_values_to_row(new_column_values))

        if status == 'Missed':
            type__c = choice(['CancelVisitor', 'CancelNoAgent'])
            if type__c == 'CancelNoAgent':
                # no agents
                create_date = fake.date_time_between_dates(
                    create_date, end_date)
                new_column_values = {
                    'External_Id__c':
                    'W_LiveChatTranscriptEvent.' + str(current_count),
                    'LiveChatTranscript.External_Id__c':
                    live_chat,
                    'Agent.External_Id__c':
                    agent,
                    'CreatedDate__c':
                    create_date.isoformat(sep=' '),
                    'Time__c':
                    create_date.isoformat(sep=' '),
                    'Type__c':
                    'ChoiceRoute',
                    'Detail__c':
                    'Choice chat request routed to all available qualified agents.',
                    'analyticsdemo_batch_id__c':
                    batch_id
                }
                current_count += 1
                new_rows.append(
                    data_gen.column_values_to_row(new_column_values))

                create_date = fake.date_time_between_dates(
                    create_date, end_date)
                new_column_values = {
                    'External_Id__c':
                    'W_LiveChatTranscriptEvent.' + str(current_count),
                    'LiveChatTranscript.External_Id__c':
                    live_chat,
                    'Agent.External_Id__c':
                    agent,
                    'CreatedDate__c':
                    create_date.isoformat(sep=' '),
                    'Time__c':
                    create_date.isoformat(sep=' '),
                    'Type__c':
                    type__c,
                    'Detail__c':
                    type_detail_map[type__c],
                    'analyticsdemo_batch_id__c':
                    batch_id
                }
                current_count += 1
                new_rows.append(
                    data_gen.column_values_to_row(new_column_values))

                type__c = choice(['LeaveVisitor', 'EndVisitor'])
                new_column_values = {
                    'External_Id__c':
                    'W_LiveChatTranscriptEvent.' + str(current_count),
                    'LiveChatTranscript.External_Id__c':
                    live_chat,
                    'Agent.External_Id__c':
                    agent,
                    'CreatedDate__c':
                    end_date.isoformat(sep=' '),
                    'Time__c':
                    end_date.isoformat(sep=' '),
                    'Type__c':
                    type__c,
                    'Detail__c':
                    type_detail_map[type__c],
                    'analyticsdemo_batch_id__c':
                    batch_id
                }
                current_count += 1
                new_rows.append(
                    data_gen.column_values_to_row(new_column_values))
            else:
                # visitor canceled
                new_column_values = {
                    'External_Id__c':
                    'W_LiveChatTranscriptEvent.' + str(current_count),
                    'LiveChatTranscript.External_Id__c':
                    live_chat,
                    'Agent.External_Id__c':
                    agent,
                    'CreatedDate__c':
                    end_date.isoformat(sep=' '),
                    'Time__c':
                    end_date.isoformat(sep=' '),
                    'Type__c':
                    type__c,
                    'Detail__c':
                    type_detail_map[type__c],
                    'analyticsdemo_batch_id__c':
                    batch_id
                }
                current_count += 1
                new_rows.append(
                    data_gen.column_values_to_row(new_column_values))
        else:
            type__c = 'ChoiceRoute'
            new_column_values = {
                'External_Id__c':
                'W_LiveChatTranscriptEvent.' + str(current_count),
                'LiveChatTranscript.External_Id__c': live_chat,
                'Agent.External_Id__c': agent,
                'CreatedDate__c': create_date.isoformat(sep=' '),
                'Time__c': create_date.isoformat(sep=' '),
                'Type__c': type__c,
                'Detail__c': type_detail_map[type__c],
                'analyticsdemo_batch_id__c': batch_id
            }
            current_count += 1
            new_rows.append(data_gen.column_values_to_row(new_column_values))

            type__c = 'Accept'
            create_date = fake.date_time_between_dates(create_date, end_date)
            new_column_values = {
                'External_Id__c':
                'W_LiveChatTranscriptEvent.' + str(current_count),
                'LiveChatTranscript.External_Id__c': live_chat,
                'Agent.External_Id__c': agent,
                'CreatedDate__c': create_date.isoformat(sep=' '),
                'Time__c': create_date.isoformat(sep=' '),
                'Type__c': type__c,
                'Detail__c': type_detail_map[type__c],
                'analyticsdemo_batch_id__c': batch_id
            }
            current_count += 1
            new_rows.append(data_gen.column_values_to_row(new_column_values))

            if ended_by == 'Visitor':
                type__c = choice(['LeaveVisitor', 'EndVisitor'])
                new_column_values = {
                    'External_Id__c':
                    'W_LiveChatTranscriptEvent.' + str(current_count),
                    'LiveChatTranscript.External_Id__c':
                    live_chat,
                    'Agent.External_Id__c':
                    agent,
                    'CreatedDate__c':
                    end_date.isoformat(sep=' '),
                    'Time__c':
                    end_date.isoformat(sep=' '),
                    'Type__c':
                    type__c,
                    'Detail__c':
                    type_detail_map[type__c],
                    'analyticsdemo_batch_id__c':
                    batch_id
                }
                current_count += 1
                new_rows.append(
                    data_gen.column_values_to_row(new_column_values))
            else:
                type__c = choice(['LeaveAgent', 'EndAgent'])
                new_column_values = {
                    'External_Id__c':
                    'W_LiveChatTranscriptEvent.' + str(current_count),
                    'LiveChatTranscript.External_Id__c':
                    live_chat,
                    'Agent.External_Id__c':
                    agent,
                    'CreatedDate__c':
                    end_date.isoformat(sep=' '),
                    'Time__c':
                    end_date.isoformat(sep=' '),
                    'Type__c':
                    type__c,
                    'Detail__c':
                    type_detail_map[type__c],
                    'analyticsdemo_batch_id__c':
                    batch_id
                }
                current_count += 1
                new_rows.append(
                    data_gen.column_values_to_row(new_column_values))

    data_gen.rows = new_rows

    # apply transformations and write file
    output_columns = [
        'External_Id__c', 'LiveChatTranscript.External_Id__c',
        'Agent.External_Id__c', 'Type__c', 'Detail__c', 'CreatedDate__c',
        'Time__c', 'analyticsdemo_batch_id__c'
    ]
    data_gen.write(output_file_name, output_columns)
Ejemplo n.º 28
0
def run(batch_id,
        source_file_name,
        output_file_name,
        reference_date=today_datetime):
    data_gen = DataGenerator()

    # load source file
    source_columns = [
        'External_Id__c', 'StageName', 'Amount', 'ForecastCategory',
        'CloseDate', 'CreatedDate__c', 'SalesStageCount__c'
    ]
    data_gen.load_source_file(source_file_name, source_columns)

    data_gen.rename_columns({
        'StageName': 'StageName__c',
        'Amount': 'Amount__c',
        'ForecastCategory': 'ForecastCategory__c',
        'CloseDate': 'CloseDate__c'
    })

    data_gen.add_copy_column('Opportunity.External_Id__c', 'External_Id__c')

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    data_gen.apply_transformations()

    stages = ['Qualification', 'Discovery', 'Proposal/Quote', 'Negotiation']
    forecast_categories = ['BestCase', 'Pipeline', 'Commit']

    pipe_bucket = [
        'No Change', 'Reopen', 'Expand', 'Reduce', 'Moved Out', 'Moved In',
        'Stage Change'
    ]
    pipe_bucket_ratio = [0.10, 0.05, 0.15, 0.15, 0.30, 0.10, 0.15]
    qualification_pipe_bucket = [
        'No Change', 'Reopen', 'Expand', 'Reduce', 'Moved Out', 'Moved In'
    ]
    qualification_pipe_bucket_ratio = [0.20, 0.05, 0.20, 0.10, 0.35, 0.10]
    zero_amount_pipe_bucket = [
        'No Change', 'Reopen', 'Moved Out', 'Moved In', 'Stage Change'
    ]
    zero_amount_pipe_bucket_ratio = [0.20, 0.05, 0.35, 0.10, 0.30]

    current_count = 1
    new_rows = []
    row_count = len(data_gen.rows)
    for i in range(row_count):
        row = data_gen.rows.pop()
        column_values = data_gen.row_to_column_values(row)

        opportunity_id = column_values['Opportunity.External_Id__c']
        close_date = dateutil.parser.parse(column_values['CloseDate__c'])
        create_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        final_amount = int(column_values['Amount__c'])
        final_forecast_category = column_values['ForecastCategory__c']
        final_stage_name = column_values['StageName__c']
        stage_count = int(column_values['SalesStageCount__c'])

        # initialize most recent event date to reference_date or earlier
        event_date_range_start = create_date + (close_date - create_date) / 2
        event_date_range_end = close_date

        if close_date > reference_date:
            event_date_range_end = reference_date
            event_date_range_start = create_date + (reference_date -
                                                    create_date) / 2

        # ensure event happens on or after opportunity create_date
        event_date = fake.date_time_between_dates(event_date_range_start,
                                                  event_date_range_end)

        # create final state
        column_values['CreatedDate__c'] = event_date
        column_values['External_Id__c'] = 'W_OpportunityHistory.' + str(
            current_count)
        current_count += 1
        new_rows.append(data_gen.column_values_to_row(column_values))

        next_create_date = event_date
        next_stage_name = final_stage_name
        next_forecast_category = final_forecast_category
        next_close_date = close_date
        next_amount = final_amount

        movedOut = False
        movedIn = False
        expand = False
        reduce = False
        reopen = False
        initialized = False

        # generate events in reverse order until create_date
        for current_stage_count in range(stage_count):
            # choose the proper bucket depending on the scenario
            bucket = pipe_bucket
            ratio = pipe_bucket_ratio
            if next_amount <= 0:
                bucket = zero_amount_pipe_bucket
                ratio = zero_amount_pipe_bucket_ratio
            elif next_stage_name == 'Qualification':
                bucket = qualification_pipe_bucket
                ratio = qualification_pipe_bucket_ratio

            event = choice(bucket, p=ratio)

            event_date_range_end = event_date
            event_date_range_start = create_date + (event_date -
                                                    create_date) / 2
            event_date = fake.date_time_between_dates(event_date_range_start,
                                                      event_date_range_end)

            # if next stage is closed, make the previous event a stage change
            if 'Closed' in next_stage_name:
                event = 'Stage Change'

            # if the event date is the create date, create the initial state
            if current_stage_count == stage_count - 1:
                event_date = create_date
                event = 'Initial State'

            if event != 'No Change':
                curr_close_date = next_close_date
                curr_amount = next_amount
                curr_stage_name = next_stage_name
                curr_forecast_category = next_forecast_category

                if event == 'Reopen' and not reopen:
                    curr_stage_name = 'Closed Lost'
                    curr_forecast_category = 'Omitted'
                    reopen = True
                elif event == 'Initial State':
                    curr_stage_name = 'Qualification'
                    curr_forecast_category = 'Pipeline'
                    initialized = True
                elif event == 'Expand' and not expand:
                    curr_amount = next_amount - int(
                        uniform(.15, .45) * final_amount)
                    if curr_amount <= 0:
                        # reduce instead
                        curr_amount = next_amount + int(
                            uniform(.15, .45) * final_amount)
                    expand = True
                elif event == 'Reduce' and not reduce:
                    curr_amount = next_amount + int(
                        uniform(.15, .45) * final_amount)
                    reduce = True
                elif event == 'Moved In' and not movedIn:
                    curr_close_date = curr_close_date + timedelta(
                        days=randint(0, 30))
                    movedIn = True
                elif event == 'Moved Out' and not movedOut:
                    curr_close_date = curr_close_date - timedelta(
                        days=randint(30, 90))
                    movedOut = True
                elif event == 'Stage Change':
                    # if next stage is not closed, use previous stage
                    if 'Closed' not in next_stage_name and stages.index(
                            next_stage_name) - 1 > 0:
                        curr_stage_name = stages[stages.index(next_stage_name)
                                                 - 1]
                    # if next stage is closed, use any stage
                    elif 'Closed' in next_stage_name:
                        curr_stage_name = stages[randint(1, len(stages) - 1)]
                    else:
                        curr_stage_name = stages[0]
                    curr_forecast_category = forecast_categories[randint(
                        0,
                        len(forecast_categories) - 1)]

                new_column_values = {
                    'External_Id__c':
                    'W_OpportunityHistory.' + str(current_count),
                    'Opportunity.External_Id__c': opportunity_id,
                    'StageName__c': curr_stage_name,
                    'Amount__c': curr_amount,
                    'ForecastCategory__c': curr_forecast_category,
                    'CreatedDate__c': event_date.isoformat(sep=' '),
                    'CloseDate__c': curr_close_date.date().isoformat(),
                    'analyticsdemo_batch_id__c': batch_id
                }
                current_count += 1
                new_rows.append(
                    data_gen.column_values_to_row(new_column_values))

                next_stage_name = curr_stage_name
                next_forecast_category = curr_forecast_category
                next_close_date = curr_close_date
                next_amount = curr_amount

    data_gen.rows = new_rows
    data_gen.reverse()

    data_gen.write(output_file_name, [
        'External_Id__c', 'Amount__c', 'StageName__c', 'ForecastCategory__c',
        'CloseDate__c', 'CreatedDate__c', 'Opportunity.External_Id__c',
        'analyticsdemo_batch_id__c'
    ])
Ejemplo n.º 29
0
def run(batch_id,
        source_file_name,
        output_file_name,
        reference_datetime=today):
    data_gen = DataGenerator()

    # load source file
    data_gen.load_source_file(source_file_name)

    data_gen.rename_column('External_Id__c', 'Case.External_Id__c')
    data_gen.rename_column('Owner.External_Id__c', 'User.External_Id__c')

    data_gen.duplicate_rows(duplication_factor=lambda: choice(
        [1, 2, 3, 4, 5], p=[.65, .15, .10, .05, .05]))

    data_gen.add_formula_column(
        'External_Id__c',
        formula=lambda: 'W_AgentWork.' + str(data_gen.current_row + 1))

    data_gen.add_copy_column('RequestDateTime__c', 'CreatedDate__c')

    def created_date_formula(column_values):
        created_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        closed_date = dateutil.parser.parse(column_values['ClosedDate__c'])
        if closed_date > reference_datetime:
            closed_date = reference_datetime
        mid_date = created_date + (closed_date - created_date) / 2
        return fake.date_time_between_dates(created_date,
                                            mid_date).isoformat(sep=' ')

    data_gen.add_formula_column('CreatedDate__c', created_date_formula)

    def assigned_date_formula(column_values):
        created_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        return (created_date +
                timedelta(seconds=randint(0, 120))).isoformat(sep=' ')

    data_gen.add_formula_column('AssignedDateTime__c', assigned_date_formula)

    def accept_date_formula(column_values):
        assigned_date = dateutil.parser.parse(
            column_values['AssignedDateTime__c'])
        return (assigned_date +
                timedelta(seconds=randint(30, 600))).isoformat(sep=' ')

    data_gen.add_formula_column('AcceptDateTime__c', accept_date_formula)

    def close_date_formula(column_values):
        accept_date = dateutil.parser.parse(column_values['AcceptDateTime__c'])
        return (accept_date +
                timedelta(seconds=randint(30, 1800))).isoformat(sep=' ')

    data_gen.add_formula_column('CloseDateTime__c', close_date_formula)

    def active_time_formula(column_values):
        accept_date = dateutil.parser.parse(column_values['AcceptDateTime__c'])
        close_date = dateutil.parser.parse(column_values['CloseDateTime__c'])
        return int((close_date - accept_date).total_seconds())

    data_gen.add_formula_column('ActiveTime__c', active_time_formula)

    data_gen.add_formula_column('AgentCapacityWhenDeclined__c',
                                lambda: randint(30, 1800))

    def cancel_date_formula(column_values):
        assigned_date = dateutil.parser.parse(
            column_values['AssignedDateTime__c'])
        return (assigned_date +
                timedelta(seconds=randint(30, 600))).isoformat(sep=' ')

    data_gen.add_formula_column('CancelDateTime__c', cancel_date_formula)

    data_gen.add_formula_column('CapacityPercentage__c',
                                lambda: randint(1, 101))

    data_gen.add_formula_column('CapacityWeight__c', lambda: randint(1, 7))

    def decline_date_formula(column_values):
        assigned_date = dateutil.parser.parse(
            column_values['AssignedDateTime__c'])
        return (assigned_date +
                timedelta(seconds=randint(30, 600))).isoformat(sep=' ')

    data_gen.add_formula_column('DeclineDateTime__c', decline_date_formula)

    data_gen.add_formula_column('DeclineReason__c', formula=fake.sentence)

    data_gen.add_copy_column('HandleTime__c', 'ActiveTime__c')

    data_gen.add_formula_column('OriginalQueue.DeveloperName', [
        'GeneralQueue', 'InternationalQueue', 'Knowledge_Translations',
        'Social_Queue', 'TargetCampaign', 'Tier1Queue', 'Tier2Queue',
        'Tier3Queue'
    ])

    data_gen.add_formula_column('PushTimeout__c', lambda: randint(0, 100))

    def push_timeout_date_formula(column_values):
        create_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        return create_date + timedelta(seconds=column_values['PushTimeout__c'])

    data_gen.add_formula_column('PushTimeoutDateTime__c',
                                push_timeout_date_formula)

    data_gen.add_formula_column(
        'ServiceChannel.DeveloperName',
        ['Cases', 'LiveMessage', 'sfdc_liveagent', 'Leads'])

    def speed_to_answer_formula(column_values):
        request_date = dateutil.parser.parse(
            column_values['RequestDateTime__c'])
        accept_date = dateutil.parser.parse(column_values['AcceptDateTime__c'])
        return int((accept_date - request_date).total_seconds())

    data_gen.add_formula_column('SpeedToAnswer__c', speed_to_answer_formula)

    data_gen.add_formula_column('Status__c', [
        'Assigned', 'Unavailable', 'Declined', 'Opened', 'Closed',
        'DeclinedOnPushTimeout', 'Canceled'
    ])

    # add a UUID for each row that is created in this batch
    data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id)

    def filter_func(column_values):
        created_date = dateutil.parser.parse(column_values['CreatedDate__c'])
        cutoff_date = reference_datetime - timedelta(days=60)
        return column_values['Origin'] == 'Chat' and created_date >= cutoff_date

    data_gen.filter(filter_function=filter_func)

    data_gen.apply_transformations()

    data_gen.sort_by('RequestDateTime__c')

    output_columns = [
        'External_Id__c', 'RequestDateTime__c', 'CreatedDate__c',
        'AssignedDateTime__c', 'AcceptDateTime__c', 'CloseDateTime__c',
        'ActiveTime__c', 'AgentCapacityWhenDeclined__c', 'CancelDateTime__c',
        'CapacityPercentage__c', 'CapacityWeight__c', 'DeclineDateTime__c',
        'DeclineReason__c', 'HandleTime__c', 'OriginalQueue.DeveloperName',
        'PushTimeout__c', 'PushTimeoutDateTime__c',
        'ServiceChannel.DeveloperName', 'SpeedToAnswer__c', 'Status__c',
        'User.External_Id__c', 'Case.External_Id__c',
        'analyticsdemo_batch_id__c'
    ]
    return data_gen.write(output_file_name, output_columns, 6000)
def run(input_path, output_path, config_source):
    configs = json.loads(file_to_string(config_source))
    time_shifting_file = configs.get('timeShiftingPivot').get('fileName')
    time_shifting_field = configs.get('timeShiftingPivot').get('fieldName')

    data_gen = DataGenerator()

    data_gen.load_source_file(input_path + time_shifting_file,
                              time_shifting_field)

    aux_date = max([x[0] for x in data_gen.rows])[:10]

    delta_to_increase = (today -
                         datetime.strptime(aux_date, "%Y-%m-%d").date()).days

    def aux_date_formula(dateToShift):
        def date_formula(column_values):
            if column_values[dateToShift] != "":
                create_date = dateutil.parser.parse(column_values[dateToShift])
                if len(column_values[dateToShift]) == 19:
                    return (create_date + timedelta(days=delta_to_increase)
                            ).strftime('%Y-%m-%d %H:%M:%S')
                elif len(column_values[dateToShift]) < 24:
                    return (
                        create_date +
                        timedelta(days=delta_to_increase)).strftime('%Y-%m-%d')
                else:
                    return (create_date + timedelta(days=delta_to_increase)
                            ).strftime('%Y-%m-%dT%H:%M:%S.000Z')

        data_gen.add_formula_column(dateToShift, date_formula)

    current_year = today.year
    map_quota_year = {}

    def quotas_date_formula(dateToShift):
        def date_formula(column_values):
            if column_values[dateToShift] != "":
                quota_year = column_values[dateToShift][:4]
                d = column_values[dateToShift].replace(
                    quota_year, map_quota_year[quota_year])
                return d

        date_index = data_gen.column_names[dateToShift]
        dates = [e[date_index] for e in data_gen.rows]
        max_year = max(dates)[:4]
        min_year = min(dates)[:4]
        map_quota_year[max_year] = str(current_year)
        map_quota_year[min_year] = str(current_year - 1)

        data_gen.add_formula_column(dateToShift, date_formula)

    if not output_path:
        output_path = 'output/'

    for input_file in configs.get('inputFiles'):

        file_name = input_file.get('fileName')
        date_fields = input_file.get('dateFields', [])
        print("Timeshifting process for ", file_name, " will start ...")
        data_gen.load_source_file(input_path + file_name)

        if file_name not in [
                'FscDemoWeeks.csv', 'WM_Add_Assets_Prediction_Final.csv',
                'WM_Churn_Predictions_Final.csv'
        ]:
            data_gen.add_constant_column('LastProcessedDate',
                                         today.isoformat())

        for dateToShift in date_fields:
            if file_name != 'FscDemoQuota.csv':
                aux_date_formula(dateToShift)
            else:
                quotas_date_formula(dateToShift)
        data_gen.apply_transformations()
        data_gen.write(output_path + file_name)