def run(batch_id, source_file_name, output_file_name, filter_function=None): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) data_gen.add_formula_column( 'Contact.External_Id__c', lambda cv: cv['Account.External_Id__c'].replace( 'W_Account', 'W_Contact')) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) data_gen.apply_transformations() if filter_function: data_gen.filter(filter_function) output_columns = [ 'External_Id__c', 'Owner.External_Id__c', 'Account.External_Id__c', 'Contact.External_Id__c', 'CreatedDate__c', 'ClosedDate__c', 'LastActivityDate__c', 'Origin', 'Tier', 'Product_Family_KB__c', 'Priority', 'SLA', 'Reason', 'Type_of_Support__c', 'CSAT__c', 'Status', 'First_Contact_Close__c', 'Time_Open__c', 'Team__c', 'close_date_offset', 'Offer_Voucher__c', 'Send_FieldService__c', 'IsEscalated', 'MilestoneStatus__c', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def generate(self, selected_filters=None, columns=None, count=5): if selected_filters is None: selected_filters = {} if columns is None: columns = self.get_columns() data_gen = DataGenerator() data_gen.row_count = count if 'gender' in selected_filters: if selected_filters['gender'] == 'male': data_gen.add_constant_column('Gender', 'Male') else: data_gen.add_constant_column('Gender', 'Female') else: data_gen.add_formula_column('Gender', formula=fake.gender) def first_name_formula(column_values): if column_values['Gender'] == 'Male': return fake.first_name_male() else: return fake.first_name_female() data_gen.add_formula_column('First Name', first_name_formula) data_gen.add_formula_column('Last Name', formula=fake.last_name) data_gen.add_formula_column('Name', lambda cv: cv['First Name'] + ' ' + cv['Last Name']) data_gen.apply_transformations() return list(map(lambda r: data_gen.row_to_column_values(r, columns).values(), data_gen.rows))
def run(batch_id, source_file_name, output_file_name): data_gen = DataGenerator() # load source file source_columns = [ 'External_Id__c', 'AccountExternalId__c', 'Owner.External_Id__c', 'Name', 'Amount', 'StageName', 'LeadSource', 'Type', 'ForecastCategoryName', 'CloseDate', 'CreatedDate__c', 'RecordType.DeveloperName', 'LastActivityDate__c', 'Product2Name__c', 'Product2Family__c', 'Region__c', 'TimeToClose__c', 'SalesStageCount__c', 'AccountAnnualRevenue__c', 'AccountNumberOfEmployees__c', 'AccountBookings__c', 'Competitor__c', 'DealSizeCategory__c', 'Exec_Meeting__c', 'Interactive_Demo__c' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('AccountExternalId__c', 'Account.External_Id__c') data_gen.rename_column('CreatedDate__c', 'DateTimeCreated__c') data_gen.add_formula_column( 'LastModifiedDate__c', lambda cv: dateutil.parser.parse(cv['LastActivityDate__c'])) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) data_gen.apply_transformations() data_gen.write(output_file_name)
def run(batch_id, source_file_name, output_file_name, source_service_resources, source_service_appointments): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) service_resources = data_gen.load_dataset("ServiceResources", source_service_resources, ['Id', 'External_ID__c']).dict( 'Id', 'External_ID__c') data_gen.add_map_column('ServiceResource.External_Id__c', 'ServiceResourceId', service_resources) service_appointments = data_gen.load_dataset( "ServiceAppointments", source_service_appointments, ['Id', 'External_ID__c']).dict('Id', 'External_ID__c') data_gen.add_map_column('ServiceAppointment.External_Id__c', 'ServiceAppointmentId', service_appointments) data_gen.apply_transformations() data_gen.write(output_file_name, columns=[ 'External_ID__c', 'ServiceResource.External_Id__c', 'ServiceAppointment.External_Id__c', 'ActualTravelTime', 'EstimatedTravelTime' ])
def run(batch_id, source_file_name, output_file_name, source_service_resources, delta=timedelta(days=14)): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) data_gen.add_formula_column('Start', lambda cv: "" if cv['Start'] == "" else (dateutil.parser.parse(cv['Start']) + timedelta(days=delta.days - 1)).replace(tzinfo=None)) data_gen.add_formula_column('End', lambda cv: "" if cv['End'] == "" else (dateutil.parser.parse(cv['End']) + timedelta(days=delta.days - 1)).replace(tzinfo=None)) service_resources = data_gen.load_dataset("ServiceResources", source_service_resources, ['Id', 'External_ID__c']).dict('Id', 'External_ID__c') data_gen.add_map_column('Resource.External_Id__c', 'ResourceId', service_resources) data_gen.apply_transformations() data_gen.add_copy_column('CreatedDate__c', 'Start') data_gen.apply_transformations() data_gen.write(output_file_name, columns=[ 'External_ID__c', 'Resource.External_Id__c', 'CreatedDate__c', 'Start', 'End', 'Type', #'State', #'Country', #'City' ])
def run(batch_id, source_file_name, output_file_name, source_operating_hours, reference_datetime=today_datetime): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) data_gen.add_constant_column('CreatedDate__c', reference_datetime.isoformat(sep=' ')) operating_hours = data_gen.load_dataset("OperatingHours", source_operating_hours, ['Id', 'External_ID__c']).dict( 'Id', 'External_ID__c') data_gen.add_map_column('OperatingHours.External_Id__c', 'OperatingHoursId', operating_hours) data_gen.apply_transformations() data_gen.write(output_file_name, columns=[ 'External_ID__c', 'OperatingHours.External_Id__c', 'StartTime', 'EndTime' ])
def run(batch_id, source_file_name, output_file_name): data_gen = DataGenerator() # load source file source_columns = ['KnowledgeArticle.External_Id__c'] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('KnowledgeArticle.External_Id__c', 'Parent.External_Id__c') data_gen.add_formula_column( 'External_Id__c', formula=lambda: 'W_KCSArticle_DCS.' + str(data_gen.current_row + 1)) data_gen.add_constant_column('DataCategoryGroupName__c', 'All') data_gen.add_constant_column('DataCategoryName__c', 'All') # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write file data_gen.apply_transformations() output_columns = [ 'External_Id__c', 'Parent.External_Id__c', 'DataCategoryGroupName__c', 'DataCategoryName__c', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name, reference_date=today): data_gen = DataGenerator() # load source file source_columns = ['External_Id__c', 'Name', 'UserRole.Name'] data_gen.load_source_file(source_file_name, source_columns) data_gen.filter(lambda cv: 'RVP' not in cv['UserRole.Name']) data_gen.filter( lambda cv: 'CSM' not in cv['UserRole.Name']) # comes from Service data_gen.rename_column('External_Id__c', 'QuotaOwner_Id__c') data_gen.rename_column('Name', 'OwnerName__c') # generate id data_gen.add_formula_column( 'External_Id__c', formula=lambda: 'W_Quota.' + str(data_gen.current_row + 1)) data_gen.duplicate_rows(24) def quota_formula(): # first month of quarter = 300k # second month of quarter = 500k # third month of quarter = 500k quarter = data_gen.current_row % 3 if quarter == 0: return 300000 elif quarter == 1: return 750000 else: return 500000 data_gen.add_formula_column('QuotaAmount__c', quota_formula) current_year = reference_date.year last_year = current_year - 1 def start_date_formula(): user_row = data_gen.current_row % 24 month = str((user_row % 12) + 1).zfill(2) day = '01' if user_row < 12: year = str(last_year) else: year = str(current_year) return dateutil.parser.parse(year + '-' + month + '-' + day).date() data_gen.add_formula_column('StartDate__c', start_date_formula) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write file data_gen.apply_transformations() data_gen.write(output_file_name, [ 'External_Id__c', 'QuotaOwner_Id__c', 'OwnerName__c', 'StartDate__c', 'QuotaAmount__c' ])
def run(batch_id, source_file_name, output_file_name, products_file_name, pricebook_file_name): data_gen = DataGenerator() # load source file source_columns = ['External_Id__c', 'Product2Name__c', 'Amount'] data_gen.load_source_file(source_file_name, source_columns) # load datasets products = data_gen.load_dataset('products', products_file_name) products_by_name = products.group_by('Name') pricebook = data_gen.load_dataset('pricebook', pricebook_file_name) pricebook_by_product = pricebook.group_by('Product2.External_Id__c') # rename columns data_gen.rename_column('External_Id__c', 'Opportunity.External_Id__c') data_gen.rename_column('Amount', 'TotalPrice') data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_OpportunityLineItem.' + str(data_gen.current_row + 1)) # transform product name to code data_gen.add_formula_column('ProductCode', lambda cv: products_by_name[cv['Product2Name__c']][0]['ProductCode']) # generate product reference id data_gen.add_formula_column('Product2.External_Id__c', lambda cv: products_by_name[cv['Product2Name__c']][0]['External_Id__c']) # generate list price data_gen.add_formula_column('ListPrice', lambda cv: pricebook_by_product[cv['ProductCode']][0]['UnitPrice']) # generate pricebook reference id data_gen.add_formula_column('PricebookEntry.External_Id__c', lambda cv: pricebook_by_product[cv['ProductCode']][0]['External_Id__c']) # generate quantity def quanity_formula(column_values): total_price = int(column_values['TotalPrice']) list_price = int(column_values['ListPrice']) quantity = total_price / list_price if quantity <= 0: quantity = 1 return ceil(quantity) data_gen.add_formula_column('Quantity', quanity_formula) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write file data_gen.apply_transformations() output_columns = [ 'External_Id__c', 'Opportunity.External_Id__c', 'TotalPrice', 'PricebookEntry.External_Id__c', 'Quantity', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name): data_gen = DataGenerator() # load source file source_columns = [ 'KnowledgeArticle.External_Id__c', 'User.External_Id__c', 'CreatedDate__c' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('KnowledgeArticle.External_Id__c', 'KCSArticle__ka.External_Id__c') data_gen.rename_column('User.External_Id__c', 'Owner.External_Id__c') data_gen.add_formula_column( 'External_Id__c', formula=lambda: 'W_KCSArticleVersion.' + str(data_gen.current_row + 1)) data_gen.add_formula_column('ArticleNumber__c', lambda: data_gen.current_row + 1) data_gen.add_formula_column('PublishStatus__c', ['Archived', 'Online']) data_gen.add_constant_column('IsLatestVersion__c', 'true') data_gen.add_constant_column('IsVisibleInApp__c', 'true') data_gen.add_constant_column('IsVisibleInCsp__c', 'true') data_gen.add_constant_column('IsVisibleInPkb__c', 'true') data_gen.add_constant_column('IsVisibleInPrm__c', 'true') data_gen.add_constant_column('VersionNumber__c', '1') data_gen.add_constant_column('Language__c', 'en_US') titles = [ "Health", "Computers", "Music", "Tools", "Home", "Outdoors", "Jewelery", "Toys", "Grocery", "Clothing", "Games", "Automotive", "Beauty", "Garden", "Books", "Industrial", "Baby", "Kids", "Movies", "Sports", "Shoes", "Electronics" ] data_gen.add_formula_column('Title__c', titles) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write file data_gen.apply_transformations() output_columns = [ 'External_Id__c', 'ArticleNumber__c', 'CreatedDate__c', 'Owner.External_Id__c', 'PublishStatus__c', 'IsLatestVersion__c', 'IsVisibleInApp__c', 'IsVisibleInCsp__c', 'IsVisibleInPkb__c', 'IsVisibleInPrm__c', 'KCSArticle__ka.External_Id__c', 'Title__c', 'VersionNumber__c', 'Language__c', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def run(source_file_name, prefix, output_file_name): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) data_gen.add_formula_column('External_ID__c', formula=lambda cv: cv['External_ID__c'] if 'External_ID__c' in cv and not str(cv['External_ID__c']) == "" else prefix + '.' + str(data_gen.current_row + 1 + 100)) data_gen.apply_transformations() # write to new path data_gen.write(output_file_name)
def run(batch_id, source_file_name, output_file_name, reference_datetime=today, id_offset=0): data_gen = DataGenerator() # load source file source_columns = [ 'External_Id__c', 'Owner.External_Id__c', 'CreatedDate__c', 'LastActivityDate__c' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('External_Id__c', 'What.External_Id__c') data_gen.rename_column('LastActivityDate__c', 'ActivityDate') # generate a random number of tasks per opportunity data_gen.duplicate_rows(duplication_factor=lambda: randint(1, 3)) data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_Task.' + str(id_offset + data_gen.current_row + 1)) data_gen.add_formula_column('TaskSubtype', formula=task.oppty_task_subtype) data_gen.add_formula_column('CallDurationInSeconds', formula=task.task_call_duration) data_gen.add_formula_column('CallDisposition', formula=task.task_call_disposition) data_gen.add_formula_column('CallType', formula=task.task_call_type) data_gen.add_formula_column('Status', formula=task.task_status) data_gen.add_formula_column('Priority', formula=task.task_priority) def create_date_formula(column_values): oppty_create_date = dateutil.parser.parse(column_values['CreatedDate__c']) oppty_last_activity_date = dateutil.parser.parse(column_values['ActivityDate']) create_date = fake.date_time_between_dates(oppty_create_date, oppty_last_activity_date) if create_date > reference_datetime: create_date = reference_datetime return create_date.isoformat(sep=' ') data_gen.add_formula_column('CreatedDate__c', create_date_formula) def activity_date_formula(column_values): create_date = dateutil.parser.parse(column_values['CreatedDate__c']).date() return (create_date + timedelta(days=randint(0, 14))).isoformat() data_gen.add_formula_column('ActivityDate', activity_date_formula) data_gen.add_formula_column('Subject', formula=task.task_subject) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write data_gen.apply_transformations() data_gen.write(output_file_name)
def run(input_path, output_path, config_source): configs = json.loads(file_to_string(config_source)) time_shifting_file = configs.get('timeShiftingPivot').get('fileName') time_shifting_field = configs.get('timeShiftingPivot').get('fieldName') data_gen = DataGenerator() data_gen.load_source_file(input_path + time_shifting_file, time_shifting_field) if time_shifting_field != 'LastProcessedDate': aux_date = max([x[0] for x in data_gen.rows])[:10] else: # LastProcessedDate is a constant column, do not iterate through all rows aux_date = data_gen.rows[0][0][:10] delta_to_increase = (today - datetime.strptime(aux_date, "%Y-%m-%d").date()).days def aux_date_formula(dateToShift): def date_formula(column_values): if column_values[dateToShift] != "": create_date = dateutil.parser.parse(column_values[dateToShift]) if len(column_values[dateToShift]) == 19: return (create_date + timedelta(days=delta_to_increase) ).strftime('%Y-%m-%d %H:%M:%S') elif len(column_values[dateToShift]) < 24: return ( create_date + timedelta(days=delta_to_increase)).strftime('%Y-%m-%d') else: return (create_date + timedelta(days=delta_to_increase) ).strftime('%Y-%m-%dT%H:%M:%S.000Z') data_gen.add_formula_column(dateToShift, date_formula) if not output_path: output_path = 'output/' for input_file in configs.get('inputFiles'): file_name = input_file.get('fileName') date_fields = input_file.get('dateFields', []) print("Timeshifting process for ", file_name, " will start ...") data_gen.load_source_file(input_path + file_name) data_gen.add_constant_column('LastProcessedDate', today.isoformat()) for dateToShift in date_fields: aux_date_formula(dateToShift) data_gen.apply_transformations() data_gen.write(output_path + file_name)
def run(batch_id, source_file_name, output_file_name, source_accounts): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) accounts = data_gen.load_dataset("Accounts", source_accounts, ['Id', 'External_ID__c']).dict('Id', 'External_ID__c') data_gen.add_map_column('Account.External_Id__c', 'AccountId', accounts) data_gen.apply_transformations() data_gen.write(output_file_name, columns=[ 'External_ID__c', 'Account.External_Id__c', 'Subject' ])
def run(batch_id, source_file_name, output_file_name): data_gen = DataGenerator() # load source file source_columns = ['KnowledgeArticle.External_Id__c', 'CreatedDate__c'] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('KnowledgeArticle.External_Id__c', 'External_Id__c') data_gen.add_formula_column('ArticleNumber__c', lambda: data_gen.current_row + 1) data_gen.add_formula_column('CaseAssociationCount__c', lambda: randint(1, 6)) def first_published_date_formula(column_values): create_date = dateutil.parser.parse(column_values['CreatedDate__c']) return (create_date + timedelta(days=randint(1, 10))).isoformat(sep=' ') data_gen.add_formula_column('FirstPublishedDate__c', formula=first_published_date_formula) def last_published_date_formula(column_values): first_publised_date = dateutil.parser.parse( column_values['FirstPublishedDate__c']) return (first_publised_date + timedelta(days=randint(1, 10))).isoformat(sep=' ') data_gen.add_formula_column('LastPublishedDate__c', formula=last_published_date_formula) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write file data_gen.apply_transformations() output_columns = [ 'ArticleNumber__c', 'External_Id__c', 'CaseAssociationCount__c', 'CreatedDate__c', 'FirstPublishedDate__c', 'LastPublishedDate__c', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name, source_products): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) products = data_gen.load_dataset("Products", source_products, ['Id', 'External_ID__c']).dict( 'Id', 'External_ID__c') data_gen.add_map_column('Product2.External_Id__c', 'Product2Id', products) data_gen.add_constant_column('Pricebook2.Name', 'Standard Price Book') data_gen.apply_transformations() data_gen.write(output_file_name, columns=[ 'External_Id__c', 'Product2.External_Id__c', 'IsActive', 'Pricebook2.Name', 'UnitPrice' ])
def run(batch_id, source_file_name, output_file_name): data_gen = DataGenerator() # load source file account_columns = ['External_Id__c'] data_gen.load_source_file(source_file_name, account_columns) data_gen.rename_column('External_Id__c', 'Account.External_Id__c') data_gen.add_formula_column( 'External_Id__c', lambda cv: cv['Account.External_Id__c'].replace( 'W_Account', 'W_Contact')) data_gen.add_formula_column('FirstName', formula=fake.first_name) data_gen.add_formula_column('LastName', formula=fake.last_name) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) data_gen.apply_transformations() data_gen.write(output_file_name)
def run(batch_id, source_file_name, output_file_name): data_gen = DataGenerator() # load source file source_columns = ['External_Id__c','UserRole.Name'] data_gen.load_source_file(source_file_name, source_columns) # data_gen.filter(lambda cv: 'RVP' in cv['UserRole.Name']) # commented out because using shape file from service with no RVP value in UserRole.Name data_gen.filter(lambda cv: 'CSM' in cv['UserRole.Name']) # comes from Service data_gen.rename_column('External_Id__c', 'ForecastUser.External_Id__c') data_gen.rename_column('UserRole.Name', 'Name') # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write file data_gen.apply_transformations() data_gen.write(output_file_name, ['Name','ForecastUser.External_Id__c','analyticsdemo_batch_id__c'])
def run(batch_id, source_file_name, output_file_name, source_pricebook, source_work_orders): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) pricebook = data_gen.load_dataset("Pricebook", source_pricebook, ['Id', 'External_Id__c']).dict('Id', 'External_Id__c') data_gen.add_map_column('PricebookEntry.External_Id__c', 'PricebookEntryId', pricebook) work_orders = data_gen.load_dataset("WorkOrders", source_work_orders, ['Id', 'External_ID__c']).dict('Id', 'External_ID__c') data_gen.add_map_column('WorkOrder.External_Id__c', 'WorkOrderId', work_orders) data_gen.apply_transformations() data_gen.write(output_file_name, columns=[ 'External_ID__c', 'PricebookEntry.External_Id__c', 'WorkOrder.External_Id__c', 'QuantityConsumed' ])
def run(batch_id, source_file_name, output_file_name, source_operating_hours): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) operating_hours = data_gen.load_dataset("OperatingHours", source_operating_hours, ['Id', 'External_ID__c']).dict('Id', 'External_ID__c') data_gen.add_map_column('OperatingHours.External_Id__c', 'OperatingHoursId', operating_hours) data_gen.apply_transformations() data_gen.write(output_file_name, columns=[ 'External_ID__c', 'Name', 'OperatingHours.External_Id__c', 'State', 'IsActive', 'Country', 'City' ])
def run(batch_id, source_file_name, output_file_name): data_gen = DataGenerator() # load source file source_columns = [ 'KnowledgeArticle.External_Id__c', 'User.External_Id__c', 'CreatedDate__c' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('KnowledgeArticle.External_Id__c', 'Parent.External_Id__c') data_gen.rename_column('User.External_Id__c', 'Owner.External_Id__c') data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_KCSArticle_ViewStat.' + str(data_gen.current_row + 1)) channels = [ 'App', 'Desktop Site', 'Mobile Site' ] data_gen.add_formula_column('Channel__c', channels) data_gen.add_formula_column('ViewCount__c', formula=lambda: randint(1, 100)) data_gen.add_formula_column('NormalizedScore__c', formula=lambda: round(uniform(1, 10), 3)) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write file data_gen.apply_transformations() output_columns = [ 'External_Id__c', 'Channel__c', 'Parent.External_Id__c', 'ViewCount__c', 'NormalizedScore__c', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def run(input_path, output_path, config_source): configs = json.loads(file_to_string(config_source)) time_shifting_file = configs.get('timeShiftingPivot').get('fileName') time_shifting_field = configs.get('timeShiftingPivot').get('fieldName') data_gen = DataGenerator() data_gen.load_source_file(input_path + time_shifting_file, time_shifting_field) aux_date = max([x[0] for x in data_gen.rows]) delta_to_increase = (today - datetime.strptime(aux_date, "%Y-%m-%d").date()).days def aux_date_formula(dateToShift): def date_formula(column_values): if column_values[dateToShift] != "": create_date = dateutil.parser.parse(column_values[dateToShift]) if len(column_values[dateToShift]) == 19: return (create_date + timedelta(days=delta_to_increase)).strftime('%Y-%m-%d %H:%M:%S') elif len(column_values[dateToShift]) < 24: return (create_date + timedelta(days=delta_to_increase)).strftime('%Y-%m-%d') else: return (create_date + timedelta(days=delta_to_increase)).strftime('%Y-%m-%dT%H:%M:%S.000Z') data_gen.add_formula_column(dateToShift, date_formula) if not output_path: output_path = 'output/' for input_file in configs.get('inputFiles'): file_name = input_file.get('fileName') date_fields = input_file.get('dateFields', []) data_gen.load_source_file(input_path + file_name) for dateToShift in date_fields: aux_date_formula(dateToShift) data_gen.apply_transformations() data_gen.write(output_path + file_name)
def run(batch_id, source_file_name, output_file_name): data_gen = DataGenerator() # load source file source_columns = [ 'External_Id__c', 'Owner.External_Id__c', 'CreatedDate__c' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('External_Id__c', 'Case.External_Id__c') data_gen.rename_column('Owner.External_Id__c', 'User.External_Id__c') # todo one case article per case? at most 1? distribution? data_gen.duplicate_rows( duplication_factor=lambda: choice([0, 1], p=[.75, .25])) data_gen.add_formula_column( 'External_Id__c', formula=lambda: 'W_CaseArticle.' + str(data_gen.current_row + 1)) data_gen.add_formula_column( 'KnowledgeArticle.External_Id__c', formula=lambda: 'W_KCSArticle.' + str(data_gen.current_row + 1)) data_gen.add_constant_column('ArticleVersionNumber__c', 1) data_gen.add_constant_column('IsSharedByEmail__c', ['true', 'false']) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write file data_gen.apply_transformations() output_columns = [ 'External_Id__c', 'User.External_Id__c', 'ArticleVersionNumber__c', 'CreatedDate__c', 'KnowledgeArticle.External_Id__c', 'IsSharedByEmail__c', 'Case.External_Id__c', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def update(source_file_name, output_file_name, source_work_orders): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) service_appointments = data_gen.load_dataset("WorkOrders", source_work_orders, ['External_ID__c']).dict('External_ID__c', 'External_ID__c') service_appointments[None] = 'None' data_gen.add_map_column('WorkOrder.External_Id__c', 'WorkOrder.External_Id__c', service_appointments) data_gen.apply_transformations() data_gen.filter(lambda cv: cv['WorkOrder.External_Id__c'].startswith('WO.')) data_gen.apply_transformations() data_gen.write(output_file_name, columns=[ 'External_ID__c', 'PricebookEntry.External_Id__c', 'WorkOrder.External_Id__c', 'QuantityConsumed' ])
def run(batch_id, source_file_name, output_file_name, source_cases, source_accounts, source_work_types, source_service_appointments, reference_datetime=today_datetime): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) cases = data_gen.load_dataset("Cases", source_cases, ['Id', 'External_ID__c']).dict('Id', 'External_ID__c') data_gen.add_map_column('Case.External_Id__c', 'CaseId', cases) accounts = data_gen.load_dataset("Accounts", source_accounts, ['Id', 'External_ID__c']).dict('Id', 'External_ID__c') data_gen.add_map_column('Account.External_Id__c', 'AccountId', accounts) work_types = data_gen.load_dataset("WorkTypes", source_work_types, ['Id', 'External_ID__c']).dict('Id', 'External_ID__c') data_gen.add_map_column('WorkType.External_Id__c', 'WorkTypeId', work_types) data_gen.add_constant_column('Pricebook2.Name', 'Standard Price Book') service_appointment_dates = data_gen.load_dataset("ServiceAppointmentDates", source_service_appointments, ['WorkOrder.External_Id__c', 'CreatedDate__c']).dict('WorkOrder.External_Id__c', 'CreatedDate__c') service_appointment_dates[None] = reference_datetime + timedelta(days=-1) data_gen.add_map_column('CreatedDate__c', 'External_ID__c', service_appointment_dates) data_gen.apply_transformations() data_gen.filter(lambda cv: cv['WorkType.External_Id__c'].startswith('WT.')) data_gen.apply_transformations() data_gen.write(output_file_name, columns=[ 'External_ID__c', 'CreatedDate__c', 'Status', 'Pricebook2.Name', 'Priority', 'Case.External_Id__c', 'Account.External_Id__c', 'WorkType.External_Id__c' ])
def updateCreatedDate(source_file_name, output_file_name, source_service_appointments, reference_datetime=today_datetime): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) service_appointments = data_gen.load_dataset( "ServiceAppointments", source_service_appointments, ['External_ID__c']).dict('External_ID__c', 'External_ID__c') service_appointments[None] = 'None' data_gen.add_map_column('ServiceAppointment.External_Id__c', 'ServiceAppointment.External_Id__c', service_appointments) data_gen.apply_transformations() data_gen.filter(lambda cv: cv['ServiceAppointment.External_Id__c']. startswith('ServiceAppointment')) data_gen.apply_transformations() service_appointment_dates = data_gen.load_dataset( "ServiceAppointmentDates", source_service_appointments, ['External_ID__c', 'CreatedDate__c']).dict('External_ID__c', 'CreatedDate__c') service_appointment_dates[None] = reference_datetime + timedelta(days=-1) data_gen.add_map_column('CreatedDate__c', 'ServiceAppointment.External_Id__c', service_appointment_dates) data_gen.apply_transformations() data_gen.write(output_file_name, columns=[ 'External_ID__c', 'ServiceResource.External_Id__c', 'ServiceAppointment.External_Id__c', 'CreatedDate__c', 'ActualTravelTime', 'EstimatedTravelTime' ])
def run(batch_id, source_file_name, output_file_name): data_gen = DataGenerator() # load source file source_columns = [ 'External_Id__c', 'Owner.External_Id__c', 'CreatedDate__c', 'EndTime__c', 'EndedBy__c', 'Status__c' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('Owner.External_Id__c', 'Agent.External_Id__c') data_gen.add_copy_column('LiveChatTranscript.External_Id__c', 'External_Id__c') data_gen.add_copy_column('Time__c', 'CreatedDate__c') data_gen.add_constant_column('Type__c', '') data_gen.add_constant_column('Detail__c', '') # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) data_gen.apply_transformations() type_detail_map = { "ChatRequest": "Visitor requested chat.", "ChoiceRoute": "Choice chat request routed to all available qualified agents.", "CancelNoAgent": "Chat request canceled because no qualifying agents were available.", "Accept": "Chat request accepted by agent.", "CancelVisitor": "Visitor clicked Cancel Chat.", "LeaveAgent": "Agent left chat.", "EndAgent": "Agent clicked End Chat.", "LeaveVisitor": "Visitor left chat.", "EndVisitor": "Visitor clicked End Chat." } current_count = 1 new_rows = [] row_count = len(data_gen.rows) for i in range(row_count): row = data_gen.rows.pop() column_values = data_gen.row_to_column_values(row) live_chat = column_values['LiveChatTranscript.External_Id__c'] agent = column_values['Agent.External_Id__c'] create_date = dateutil.parser.parse(column_values['CreatedDate__c']) end_date = dateutil.parser.parse(column_values['EndTime__c']) ended_by = column_values['EndedBy__c'] status = column_values['Status__c'] # initialize chat request new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': create_date.isoformat(sep=' '), 'Time__c': create_date.isoformat(sep=' '), 'Type__c': 'ChatRequest', 'Detail__c': 'Visitor requested chat.', 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append(data_gen.column_values_to_row(new_column_values)) if status == 'Missed': type__c = choice(['CancelVisitor', 'CancelNoAgent']) if type__c == 'CancelNoAgent': # no agents create_date = fake.date_time_between_dates( create_date, end_date) new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': create_date.isoformat(sep=' '), 'Time__c': create_date.isoformat(sep=' '), 'Type__c': 'ChoiceRoute', 'Detail__c': 'Choice chat request routed to all available qualified agents.', 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append( data_gen.column_values_to_row(new_column_values)) create_date = fake.date_time_between_dates( create_date, end_date) new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': create_date.isoformat(sep=' '), 'Time__c': create_date.isoformat(sep=' '), 'Type__c': type__c, 'Detail__c': type_detail_map[type__c], 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append( data_gen.column_values_to_row(new_column_values)) type__c = choice(['LeaveVisitor', 'EndVisitor']) new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': end_date.isoformat(sep=' '), 'Time__c': end_date.isoformat(sep=' '), 'Type__c': type__c, 'Detail__c': type_detail_map[type__c], 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append( data_gen.column_values_to_row(new_column_values)) else: # visitor canceled new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': end_date.isoformat(sep=' '), 'Time__c': end_date.isoformat(sep=' '), 'Type__c': type__c, 'Detail__c': type_detail_map[type__c], 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append( data_gen.column_values_to_row(new_column_values)) else: type__c = 'ChoiceRoute' new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': create_date.isoformat(sep=' '), 'Time__c': create_date.isoformat(sep=' '), 'Type__c': type__c, 'Detail__c': type_detail_map[type__c], 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append(data_gen.column_values_to_row(new_column_values)) type__c = 'Accept' create_date = fake.date_time_between_dates(create_date, end_date) new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': create_date.isoformat(sep=' '), 'Time__c': create_date.isoformat(sep=' '), 'Type__c': type__c, 'Detail__c': type_detail_map[type__c], 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append(data_gen.column_values_to_row(new_column_values)) if ended_by == 'Visitor': type__c = choice(['LeaveVisitor', 'EndVisitor']) new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': end_date.isoformat(sep=' '), 'Time__c': end_date.isoformat(sep=' '), 'Type__c': type__c, 'Detail__c': type_detail_map[type__c], 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append( data_gen.column_values_to_row(new_column_values)) else: type__c = choice(['LeaveAgent', 'EndAgent']) new_column_values = { 'External_Id__c': 'W_LiveChatTranscriptEvent.' + str(current_count), 'LiveChatTranscript.External_Id__c': live_chat, 'Agent.External_Id__c': agent, 'CreatedDate__c': end_date.isoformat(sep=' '), 'Time__c': end_date.isoformat(sep=' '), 'Type__c': type__c, 'Detail__c': type_detail_map[type__c], 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append( data_gen.column_values_to_row(new_column_values)) data_gen.rows = new_rows # apply transformations and write file output_columns = [ 'External_Id__c', 'LiveChatTranscript.External_Id__c', 'Agent.External_Id__c', 'Type__c', 'Detail__c', 'CreatedDate__c', 'Time__c', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def run(batch_id, source_file_name, output_file_name, reference_date=today_datetime): data_gen = DataGenerator() # load source file source_columns = [ 'External_Id__c', 'StageName', 'Amount', 'ForecastCategory', 'CloseDate', 'CreatedDate__c', 'SalesStageCount__c' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_columns({ 'StageName': 'StageName__c', 'Amount': 'Amount__c', 'ForecastCategory': 'ForecastCategory__c', 'CloseDate': 'CloseDate__c' }) data_gen.add_copy_column('Opportunity.External_Id__c', 'External_Id__c') # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) data_gen.apply_transformations() stages = ['Qualification', 'Discovery', 'Proposal/Quote', 'Negotiation'] forecast_categories = ['BestCase', 'Pipeline', 'Commit'] pipe_bucket = [ 'No Change', 'Reopen', 'Expand', 'Reduce', 'Moved Out', 'Moved In', 'Stage Change' ] pipe_bucket_ratio = [0.10, 0.05, 0.15, 0.15, 0.30, 0.10, 0.15] qualification_pipe_bucket = [ 'No Change', 'Reopen', 'Expand', 'Reduce', 'Moved Out', 'Moved In' ] qualification_pipe_bucket_ratio = [0.20, 0.05, 0.20, 0.10, 0.35, 0.10] zero_amount_pipe_bucket = [ 'No Change', 'Reopen', 'Moved Out', 'Moved In', 'Stage Change' ] zero_amount_pipe_bucket_ratio = [0.20, 0.05, 0.35, 0.10, 0.30] current_count = 1 new_rows = [] row_count = len(data_gen.rows) for i in range(row_count): row = data_gen.rows.pop() column_values = data_gen.row_to_column_values(row) opportunity_id = column_values['Opportunity.External_Id__c'] close_date = dateutil.parser.parse(column_values['CloseDate__c']) create_date = dateutil.parser.parse(column_values['CreatedDate__c']) final_amount = int(column_values['Amount__c']) final_forecast_category = column_values['ForecastCategory__c'] final_stage_name = column_values['StageName__c'] stage_count = int(column_values['SalesStageCount__c']) # initialize most recent event date to reference_date or earlier event_date_range_start = create_date + (close_date - create_date) / 2 event_date_range_end = close_date if close_date > reference_date: event_date_range_end = reference_date event_date_range_start = create_date + (reference_date - create_date) / 2 # ensure event happens on or after opportunity create_date event_date = fake.date_time_between_dates(event_date_range_start, event_date_range_end) # create final state column_values['CreatedDate__c'] = event_date column_values['External_Id__c'] = 'W_OpportunityHistory.' + str( current_count) current_count += 1 new_rows.append(data_gen.column_values_to_row(column_values)) next_create_date = event_date next_stage_name = final_stage_name next_forecast_category = final_forecast_category next_close_date = close_date next_amount = final_amount movedOut = False movedIn = False expand = False reduce = False reopen = False initialized = False # generate events in reverse order until create_date for current_stage_count in range(stage_count): # choose the proper bucket depending on the scenario bucket = pipe_bucket ratio = pipe_bucket_ratio if next_amount <= 0: bucket = zero_amount_pipe_bucket ratio = zero_amount_pipe_bucket_ratio elif next_stage_name == 'Qualification': bucket = qualification_pipe_bucket ratio = qualification_pipe_bucket_ratio event = choice(bucket, p=ratio) event_date_range_end = event_date event_date_range_start = create_date + (event_date - create_date) / 2 event_date = fake.date_time_between_dates(event_date_range_start, event_date_range_end) # if next stage is closed, make the previous event a stage change if 'Closed' in next_stage_name: event = 'Stage Change' # if the event date is the create date, create the initial state if current_stage_count == stage_count - 1: event_date = create_date event = 'Initial State' if event != 'No Change': curr_close_date = next_close_date curr_amount = next_amount curr_stage_name = next_stage_name curr_forecast_category = next_forecast_category if event == 'Reopen' and not reopen: curr_stage_name = 'Closed Lost' curr_forecast_category = 'Omitted' reopen = True elif event == 'Initial State': curr_stage_name = 'Qualification' curr_forecast_category = 'Pipeline' initialized = True elif event == 'Expand' and not expand: curr_amount = next_amount - int( uniform(.15, .45) * final_amount) if curr_amount <= 0: # reduce instead curr_amount = next_amount + int( uniform(.15, .45) * final_amount) expand = True elif event == 'Reduce' and not reduce: curr_amount = next_amount + int( uniform(.15, .45) * final_amount) reduce = True elif event == 'Moved In' and not movedIn: curr_close_date = curr_close_date + timedelta( days=randint(0, 30)) movedIn = True elif event == 'Moved Out' and not movedOut: curr_close_date = curr_close_date - timedelta( days=randint(30, 90)) movedOut = True elif event == 'Stage Change': # if next stage is not closed, use previous stage if 'Closed' not in next_stage_name and stages.index( next_stage_name) - 1 > 0: curr_stage_name = stages[stages.index(next_stage_name) - 1] # if next stage is closed, use any stage elif 'Closed' in next_stage_name: curr_stage_name = stages[randint(1, len(stages) - 1)] else: curr_stage_name = stages[0] curr_forecast_category = forecast_categories[randint( 0, len(forecast_categories) - 1)] new_column_values = { 'External_Id__c': 'W_OpportunityHistory.' + str(current_count), 'Opportunity.External_Id__c': opportunity_id, 'StageName__c': curr_stage_name, 'Amount__c': curr_amount, 'ForecastCategory__c': curr_forecast_category, 'CreatedDate__c': event_date.isoformat(sep=' '), 'CloseDate__c': curr_close_date.date().isoformat(), 'analyticsdemo_batch_id__c': batch_id } current_count += 1 new_rows.append( data_gen.column_values_to_row(new_column_values)) next_stage_name = curr_stage_name next_forecast_category = curr_forecast_category next_close_date = curr_close_date next_amount = curr_amount data_gen.rows = new_rows data_gen.reverse() data_gen.write(output_file_name, [ 'External_Id__c', 'Amount__c', 'StageName__c', 'ForecastCategory__c', 'CloseDate__c', 'CreatedDate__c', 'Opportunity.External_Id__c', 'analyticsdemo_batch_id__c' ])
def run(batch_id, source_file_name, output_file_name, reference_datetime=today): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) data_gen.rename_column('External_Id__c', 'Case.External_Id__c') data_gen.rename_column('Owner.External_Id__c', 'User.External_Id__c') data_gen.duplicate_rows(duplication_factor=lambda: choice( [1, 2, 3, 4, 5], p=[.65, .15, .10, .05, .05])) data_gen.add_formula_column( 'External_Id__c', formula=lambda: 'W_AgentWork.' + str(data_gen.current_row + 1)) data_gen.add_copy_column('RequestDateTime__c', 'CreatedDate__c') def created_date_formula(column_values): created_date = dateutil.parser.parse(column_values['CreatedDate__c']) closed_date = dateutil.parser.parse(column_values['ClosedDate__c']) if closed_date > reference_datetime: closed_date = reference_datetime mid_date = created_date + (closed_date - created_date) / 2 return fake.date_time_between_dates(created_date, mid_date).isoformat(sep=' ') data_gen.add_formula_column('CreatedDate__c', created_date_formula) def assigned_date_formula(column_values): created_date = dateutil.parser.parse(column_values['CreatedDate__c']) return (created_date + timedelta(seconds=randint(0, 120))).isoformat(sep=' ') data_gen.add_formula_column('AssignedDateTime__c', assigned_date_formula) def accept_date_formula(column_values): assigned_date = dateutil.parser.parse( column_values['AssignedDateTime__c']) return (assigned_date + timedelta(seconds=randint(30, 600))).isoformat(sep=' ') data_gen.add_formula_column('AcceptDateTime__c', accept_date_formula) def close_date_formula(column_values): accept_date = dateutil.parser.parse(column_values['AcceptDateTime__c']) return (accept_date + timedelta(seconds=randint(30, 1800))).isoformat(sep=' ') data_gen.add_formula_column('CloseDateTime__c', close_date_formula) def active_time_formula(column_values): accept_date = dateutil.parser.parse(column_values['AcceptDateTime__c']) close_date = dateutil.parser.parse(column_values['CloseDateTime__c']) return int((close_date - accept_date).total_seconds()) data_gen.add_formula_column('ActiveTime__c', active_time_formula) data_gen.add_formula_column('AgentCapacityWhenDeclined__c', lambda: randint(30, 1800)) def cancel_date_formula(column_values): assigned_date = dateutil.parser.parse( column_values['AssignedDateTime__c']) return (assigned_date + timedelta(seconds=randint(30, 600))).isoformat(sep=' ') data_gen.add_formula_column('CancelDateTime__c', cancel_date_formula) data_gen.add_formula_column('CapacityPercentage__c', lambda: randint(1, 101)) data_gen.add_formula_column('CapacityWeight__c', lambda: randint(1, 7)) def decline_date_formula(column_values): assigned_date = dateutil.parser.parse( column_values['AssignedDateTime__c']) return (assigned_date + timedelta(seconds=randint(30, 600))).isoformat(sep=' ') data_gen.add_formula_column('DeclineDateTime__c', decline_date_formula) data_gen.add_formula_column('DeclineReason__c', formula=fake.sentence) data_gen.add_copy_column('HandleTime__c', 'ActiveTime__c') data_gen.add_formula_column('OriginalQueue.DeveloperName', [ 'GeneralQueue', 'InternationalQueue', 'Knowledge_Translations', 'Social_Queue', 'TargetCampaign', 'Tier1Queue', 'Tier2Queue', 'Tier3Queue' ]) data_gen.add_formula_column('PushTimeout__c', lambda: randint(0, 100)) def push_timeout_date_formula(column_values): create_date = dateutil.parser.parse(column_values['CreatedDate__c']) return create_date + timedelta(seconds=column_values['PushTimeout__c']) data_gen.add_formula_column('PushTimeoutDateTime__c', push_timeout_date_formula) data_gen.add_formula_column( 'ServiceChannel.DeveloperName', ['Cases', 'LiveMessage', 'sfdc_liveagent', 'Leads']) def speed_to_answer_formula(column_values): request_date = dateutil.parser.parse( column_values['RequestDateTime__c']) accept_date = dateutil.parser.parse(column_values['AcceptDateTime__c']) return int((accept_date - request_date).total_seconds()) data_gen.add_formula_column('SpeedToAnswer__c', speed_to_answer_formula) data_gen.add_formula_column('Status__c', [ 'Assigned', 'Unavailable', 'Declined', 'Opened', 'Closed', 'DeclinedOnPushTimeout', 'Canceled' ]) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) def filter_func(column_values): created_date = dateutil.parser.parse(column_values['CreatedDate__c']) cutoff_date = reference_datetime - timedelta(days=60) return column_values['Origin'] == 'Chat' and created_date >= cutoff_date data_gen.filter(filter_function=filter_func) data_gen.apply_transformations() data_gen.sort_by('RequestDateTime__c') output_columns = [ 'External_Id__c', 'RequestDateTime__c', 'CreatedDate__c', 'AssignedDateTime__c', 'AcceptDateTime__c', 'CloseDateTime__c', 'ActiveTime__c', 'AgentCapacityWhenDeclined__c', 'CancelDateTime__c', 'CapacityPercentage__c', 'CapacityWeight__c', 'DeclineDateTime__c', 'DeclineReason__c', 'HandleTime__c', 'OriginalQueue.DeveloperName', 'PushTimeout__c', 'PushTimeoutDateTime__c', 'ServiceChannel.DeveloperName', 'SpeedToAnswer__c', 'Status__c', 'User.External_Id__c', 'Case.External_Id__c', 'analyticsdemo_batch_id__c' ] return data_gen.write(output_file_name, output_columns, 6000)
def run(input_path, output_path, config_source): configs = json.loads(file_to_string(config_source)) time_shifting_file = configs.get('timeShiftingPivot').get('fileName') time_shifting_field = configs.get('timeShiftingPivot').get('fieldName') data_gen = DataGenerator() data_gen.load_source_file(input_path + time_shifting_file, time_shifting_field) aux_date = max([x[0] for x in data_gen.rows])[:10] delta_to_increase = (today - datetime.strptime(aux_date, "%Y-%m-%d").date()).days def aux_date_formula(dateToShift): def date_formula(column_values): if column_values[dateToShift] != "": create_date = dateutil.parser.parse(column_values[dateToShift]) if len(column_values[dateToShift]) == 19: return (create_date + timedelta(days=delta_to_increase) ).strftime('%Y-%m-%d %H:%M:%S') elif len(column_values[dateToShift]) < 24: return ( create_date + timedelta(days=delta_to_increase)).strftime('%Y-%m-%d') else: return (create_date + timedelta(days=delta_to_increase) ).strftime('%Y-%m-%dT%H:%M:%S.000Z') data_gen.add_formula_column(dateToShift, date_formula) current_year = today.year map_quota_year = {} def quotas_date_formula(dateToShift): def date_formula(column_values): if column_values[dateToShift] != "": quota_year = column_values[dateToShift][:4] d = column_values[dateToShift].replace( quota_year, map_quota_year[quota_year]) return d date_index = data_gen.column_names[dateToShift] dates = [e[date_index] for e in data_gen.rows] max_year = max(dates)[:4] min_year = min(dates)[:4] map_quota_year[max_year] = str(current_year) map_quota_year[min_year] = str(current_year - 1) data_gen.add_formula_column(dateToShift, date_formula) if not output_path: output_path = 'output/' for input_file in configs.get('inputFiles'): file_name = input_file.get('fileName') date_fields = input_file.get('dateFields', []) print("Timeshifting process for ", file_name, " will start ...") data_gen.load_source_file(input_path + file_name) if file_name not in [ 'FscDemoWeeks.csv', 'WM_Add_Assets_Prediction_Final.csv', 'WM_Churn_Predictions_Final.csv' ]: data_gen.add_constant_column('LastProcessedDate', today.isoformat()) for dateToShift in date_fields: if file_name != 'FscDemoQuota.csv': aux_date_formula(dateToShift) else: quotas_date_formula(dateToShift) data_gen.apply_transformations() data_gen.write(output_path + file_name)