def main(sessions, do_fix=False): first_session = sessions[0] for s in sessions: s.print_messages = False session = first_session files_list = [ item for item in listdir("./resources/sf_update_files") if isdir("./resources/sf_update_files/" + item) == False and item != ".DS_Store" and not item.startswith("~$") ] files_list = sorted(files_list, key=lambda item: -(path.getmtime( "./resources/sf_update_files/" + item))) selected_file = prompt('\nThe following update files are available:', files_list) rows = [] # object_name = lastSettings["objectName"] if file_selection_input == "0" else None data_sheet_name = None # Settings defaults settings = ObjDict({ "BATCH_SIZE": 2000, "OPERATION": None, "DO_UPSERT": None # Should be deprecated... use OPERATION instead , "BYPASS_AUTOMATION": None, "EXT_ID": None, "PARALLEL_CONCURRENCY": True, "BULK_API_MODE": True }) # Get data and settings from file if selected_file.endswith(".csv"): with open("./resources/sf_update_files/" + selected_file, 'r', encoding='utf-8-sig') as file: reader = csv.DictReader(file) for row in reader: rows.append(row) elif selected_file.endswith(".xlsx"): file_path = './resources/sf_update_files/{}'.format(selected_file) xlsx_file = pd.ExcelFile(file_path) sheets = xlsx_file.sheet_names data_sheet_name = sheets[0] datadf = pd.read_excel(xlsx_file, data_sheet_name) settingsdf = pd.read_excel( xlsx_file, 'Settings') if 'Settings' in sheets else None # Fill nulls with blank strings datadf = datadf.fillna(value='') # Set column names to strings datadf.columns = datadf.columns.astype(str) # Set timestamp columns to string for col in datadf.select_dtypes(include=['datetime64']).columns.values: datadf[col] = datadf[col].astype(str) # Set numeric columns to zero-trimmed string for col in datadf.select_dtypes( include=['int64', 'float64']).columns.values: datadf[col] = datadf[col].astype(float).astype(str) datadf[col] = datadf[col].str.replace('.0', '', regex=False) rows = datadf.to_dict('records') if settingsdf is not None: inputsettings = settingsdf.set_index('Field').to_dict('index') inputsettings = { key: val['Value'] for key, val in inputsettings.items() } settings.update(inputsettings) # xlsx_file = xlrd.open_workbook("./resources/sf_update_files/" + selected_file) # sheets = xlsx_file.sheet_names() # data_sheet = xlsx_file.sheet_by_index(0) # data_sheet_name = sheets[0] # settings_sheet = xlsx_file.sheet_by_name("Settings") if "Settings" in sheets else None # headers = [str(v) for v in data_sheet.row_values(0)] # for row_num in range(1,data_sheet.nrows): # new_row = dict() # src_row = data_sheet.row_values(row_num) # for col_num in range(0,len(headers)): # new_row[headers[col_num]] = src_row[col_num] # rows.append(new_row) # if settings_sheet is not None: # for row_num in range(0, settings_sheet.nrows): # src_row = settings_sheet.row_values(row_num) # settings[src_row[0]] = True if src_row[1] == 1 else False if src_row[1] == 0 else src_row[1] pass else: print("No file") # Handle for deprecated DO_UPSERT setting if settings.DO_UPSERT is True and settings.OPERATION is None: settings.OPERATION = 'UPSERT' operation = str( settings.OPERATION).lower() if settings.OPERATION is not None else None # Try to detect Object name from record Ids in file # If no record Ids are present, try to use the name of the tab in the file we are loading # If no match is found, prompt the user for the object name rows_with_id = [r for r in rows if "Id" in r and r["Id"] != ""] source_field_names = {key for key in rows[0].keys()} if len(rows_with_id) > 0: object_name = session.get_object_name(rows_with_id[0]["Id"]) else: all_object_names = [ item["name"] for item in session.get_org_description()["sobjects"] ] object_names_in_file_name = [ item for item in all_object_names if " " + item + " " in selected_file ] if data_sheet_name in all_object_names: object_name = data_sheet_name elif len(object_names_in_file_name) == 1: object_name = object_names_in_file_name[0] else: object_name = prompt( "\nWhat object are the records in this file for?") # lastSettings["objectName"] = object_name # with open(settingsLoc, 'w') as outfile: # json.dump(lastSettings, outfile) try: object_desc = session.get_object_description(object_name) except: raise if operation is None or operation == 'upsert': source_fields_relationship_names = { f[0:f.find('.')] for f in source_field_names if '.' in f } upsert_matches = [{ "field": item["name"] } for item in object_desc.fields if item["externalId"] == True and item["name"] in source_field_names] possible_reference_upsert_matches = [ { "referenceTo": item.referenceTo[0], "match_string": item.relationshipName + ".", "reference_object_descs": [ threading.new(session.get_object_description, r) for r in item.referenceTo ] } for item in object_desc.fields if len(item.referenceTo) > 0 and item.relationshipName in source_fields_relationship_names ] reference_upsert_matches = [] for field in source_field_names: for match in possible_reference_upsert_matches: if field.startswith(match["match_string"]): upsert_match_object_desc = session.get_object_description( match["referenceTo"]) reference_upsert_matches.extend( [{ "field": field, "matching_object": match["referenceTo"], "matching_field": item["name"] } for item in upsert_match_object_desc["fields"] if item["externalId"] == True and item["name"] == field[field.find(".") + 1:]]) if len(upsert_matches) > 0: print( "\nFound the following External ID references for this object: {}" .format(", ".join([item["field"] for item in upsert_matches]))) if len(reference_upsert_matches) > 0: print( "Found the following External ID references for a lookup object: {}" .format(", ".join( [item["field"] for item in reference_upsert_matches]))) if len(upsert_matches) + len( reference_upsert_matches) > 0 and settings.OPERATION is None: if prompt("\nWould you like to upsert?", boolean=True): operation = 'upsert' if operation is None: if ((len(rows_with_id) > 0) == False # If true, cannot do insert and (len(rows_with_id) != len(rows)) == False): # If true, cannot do update operation = prompt("\nWhat operation would you like to perform?", options={ 'Insert': 'insert', 'Update': 'update' }) # lastSettings["doUpsert"] = do_upsert # with open(settingsLoc, 'w') as outfile: # json.dump(lastSettings, outfile) if operation == 'upsert': upsert_matches.insert(0, {"field": "Id"}) self_external_id = settings.EXT_ID if self_external_id is None: if len(upsert_matches) == 1: self_external_id = upsert_matches[0]["field"] else: self_external_id = prompt( "\nWhat ID field would you like to use for upsert?", options=[item['field'] for item in upsert_matches]) # print("\nWhat ID field would you like to use for upsert?") # counter = 1 # print_str = "" # for item in upsert_matches: # print_str += "{}) {} \n".format(counter, item["field"]) # counter += 1 # print(print_str) # self_external_id = upsert_matches[int(input())-1]["field"] if len([ item for item in upsert_matches if item["field"] == self_external_id ]) == 0: print( "External ID field '{}' does not appear in the selected file name." .format(self_external_id)) raise fields_to_update = [ item["name"] for item in object_desc["fields"] if item["name"] in rows[0] and item["updateable"] == True and item["calculated"] == False and item["autoNumber"] == False ] fields_to_update.extend( [item["field"] for item in reference_upsert_matches]) fields_to_ignore = [ item for item in rows[0] if item not in fields_to_update ] rows_to_update = [{ f: v for (f, v) in r.items() if f == "Id" or f in fields_to_update or f in [mat["field"] for mat in upsert_matches] } for r in rows] mode = 'bulk' if settings.BULK_API_MODE == True else 'simple' if settings.BYPASS_AUTOMATION is None: settings.BYPASS_AUTOMATION = prompt( f"\nDo you need to bypass automation for this {operation}?", boolean=True) print("Selected file: {}".format(selected_file)) print("Operation: {}".format(operation.title()) + (" (on {})".format(self_external_id ) if self_external_id is not None else "")) print("Table: {}".format(object_name)) print("Bypass automation: {}".format(settings.BYPASS_AUTOMATION)) print("Fields to update: {}".format(", ".join(fields_to_update))) print("Fields to ignore: {}".format(", ".join(fields_to_ignore))) do_operation_confirmation = prompt( f"\nWill {operation} {len(rows)} records. Are you sure?", boolean=True) # Now that all settings have been determined, perform the insert/update/delete in ALL sessions that were passed into the process # It is assumed that the system metadata that was queried for the 1st session is the same in the other sessions concurrency = "Parallel" if settings.PARALLEL_CONCURRENCY else "Serial" if do_operation_confirmation: settings.BYPASS_AUTOMATION = settings.BYPASS_AUTOMATION def perform_crud_operation(session): if settings.BYPASS_AUTOMATION: session.add_bypass_settings() else: session.remove_bypass_settings() if operation == "insert": job_result = session.insert_records(object_name, rows_to_update, concurrency=concurrency) elif operation == "update": job_result = session.update_records(rows_to_update, concurrency=concurrency) elif operation == "upsert": job_result = session.upsert_records(object_name, rows_to_update, self_external_id, concurrency=concurrency, mode=mode) else: pass if job_result is not None and "status" in job_result and job_result[ "status"]["numberRecordsFailed"] != "0": print("{} records failed.".format( job_result["status"]["numberRecordsFailed"])) # results = session.get_job_results(job_result) # session.write_file("./resources/sf_update_files/error_logs/error_{}".format(selected_file.replace(".xlsx", ".csv")), results) if settings.BYPASS_AUTOMATION: session.remove_bypass_settings() for session in sessions: threading.new(perform_crud_operation, session) threading.wait() print("\nOperation complete!") else: print("\nTerminated.") pass
def dataload_file(self, file_path): object_name = None external_id_field_name = None # Settings defaults settings = ObjDict({ "BATCH_SIZE": 2000 , "OPERATION": None , "BYPASS_AUTOMATION": None , "CONCURRENCY": 'Parallel' , "MODE": 'simple' , "DO_BACKUP": True , "DISABLE_ACCOUNT_DUPLICATE_RULE": False }) if file_path.endswith('.csv'): df = pd.read_csv(file_path) data_sheet_name = None elif file_path.endswith('.xlsx'): xlsx = pd.ExcelFile(file_path) all_sheets = xlsx.sheet_names option_sheets = [s for s in all_sheets if s != 'Settings'] data_sheet_name = option_sheets[0] if len(option_sheets) == 1 else prompt('Which sheet should be loaded?', options=option_sheets) df = pd.read_excel(xlsx, data_sheet_name) df.columns = df.columns.astype(str) if 'Settings' in all_sheets: df_settings = pd.read_excel(xlsx, 'Settings') inputsettings = { key:val['Value'] for key,val in df_settings.set_index('Field').to_dict('index').items() } settings.update(inputsettings) assert type(settings.BATCH_SIZE) is int assert settings.OPERATION.lower() in (None,'insert','update','upsert','delete','undelete') assert settings.BYPASS_AUTOMATION in (True,False) assert settings.CONCURRENCY in ('Parallel','Serial') assert settings.MODE in ('simple','bulk') else: raise Exception('Can only parse files with a .csv or .xlsx extension') instances = prompt('For which instance(s) should this file be loaded?', multiselect=True, options={ (key[key.rindex('.')+1:] + (' prod' if val['sandbox'] == 'False' else '')).strip(): val for key, val in self.sf.instance_credentials.items() if 'security_token' in val }) sessions = [Salesforce_API(c) for c in instances] # Try to detect Object based on records in the file # If no record Ids are present, try to use the name of the tab in the file if 'Id' in df: id_list = df[['Id']].fillna('').query("Id != ''")['Id'].to_list() if len(id_list) > 0: try: object_name = sessions[0].get_object_name(id_list) except: pass if object_name is None: all_object_names = {item.name for item in sessions[0].get_org_description().sobjects} if data_sheet_name is not None and data_sheet_name in all_object_names: object_name = data_sheet_name if object_name is None: object_name = prompt("\nWhat object are the records in this file for?", expected=all_object_names) if 'RecordType.Name' in df and 'RecordTypeId' not in df: df['RecordTypeId'] = df['RecordType.Name'].apply(lambda x: self.get_recordtype_map()[(object_name, x)].Id if (object_name, x) in self.get_recordtype_map() else None) object_desc = threading.new(sessions[0].get_object_description, object_name) operation = str(settings.OPERATION).lower() if settings.OPERATION is not None else None if operation is None: all_records_have_id = 'Id' in df and len(df) == len(df.fillna('').query("Id != ''")) operation_options = [ 'insert', 'upsert', 'update' if all_records_have_id else None, 'delete' if all_records_have_id else None, 'undelete' if all_records_have_id else None, ] operation = prompt('What operation needs to be performed?', options=[o for o in operation_options if o is not None]) if operation == 'upsert': external_id_fields = [f.name for f in object_desc.result().fields if f.externalId is True and f.name in df.columns.values] if len(external_id_fields) == 1: external_id_field_name = external_id_fields[0] elif len(external_id_fields) == 0: external_id_field_name = 'Id' else: external_id_field_name = prompt('Which field would you like to use as the External ID?', options=external_id_fields) if 'RecordTypeId' not in df and operation in ('insert','upsert') and len(object_desc.result().recordTypeInfos) > 1: default_record_type = [item.name for item in object_desc.result().recordTypeInfos if item.defaultRecordTypeMapping is True][0] if prompt(f'No RecordTypeId was specified. New records will be defaulted to RecordType = "{default_record_type}". Proceed?', boolean=True) is False: raise Exception('Terminated - No RecordTypeId specified') if settings.DO_BACKUP and len(df) > 5000: if not prompt(f"Large record count: {len(df)} records. Do you want to run a backup? (Normally automatic)", boolean=True): settings.DO_BACKUP=False valid_fields = sessions[0]._get_valid_fields_for_operation(operation, object_name, first_row=df.iloc[0].to_dict(), external_id_field_name=external_id_field_name) fields_to_update = [f for f in df.columns.values if f.lower() in valid_fields] fields_to_ignore = [f for f in df.columns.values if f.lower() not in valid_fields] custom_settings_options = { 'Yes': True, 'No': False, 'Set Data Admin Only': {'dataadmin': True, 'automation': False, 'pbflow': False, 'fc': False, 'multicurrency': False}, 'Custom': self.sf.lndatautil._configure_GeneralSettings__c } settings.BYPASS_AUTOMATION = settings.BYPASS_AUTOMATION if settings.BYPASS_AUTOMATION is not None else prompt(f"\nDo you need to bypass automation for this {operation}?", options=custom_settings_options, call_options=True) while True: external_id_lookups_text = ', '.join([f for f in fields_to_update if '.' in f]) operation_message_end_text = f" on {external_id_field_name}" if operation == 'upsert' else '' operation_message_end_text += f" and External Id lookup(s) for: {external_id_lookups_text}" if len(external_id_lookups_text) > 0 else '' print(f"Selected file: {file_path[file_path.rindex('/')+1:]}") print(f"Operation: {operation.title()}{operation_message_end_text}") print(f"Table: {object_name}") print(f"Bypass Settings: {settings.BYPASS_AUTOMATION}") print(f"Job Settings: Mode: {settings.MODE}, Batch size: {settings.BATCH_SIZE}, Concurrency: {settings.CONCURRENCY}") print(f"Fields to update: {', '.join(fields_to_update)}") print(f"Fields to ignore: {', '.join(fields_to_ignore)}") operation_confirmation = prompt(f"\nWill {operation} {len(df)} records. Are you sure? Press 'M' to modify settings. (Y/N/M)", expected=['Y','N','M']) if operation_confirmation == 'M': options = { "BYPASS_AUTOMATION": {'options':custom_settings_options, 'call_options': True}, "BATCH_SIZE": {'expected_type': int}, "CONCURRENCY": {'expected': ['Parallel','Serial','']}, "MODE": {'expected': ['simple','bulk','']}, "DISABLE_ACCOUNT_DUPLICATE_RULE": {'options': [True, False]}, } while True: modify_options = ['All'] + list(options.keys()) + ['Done'] selected_option = prompt('Modify:', modify_options) if selected_option == 'All': for option, val in options.items(): new = prompt(f'Set setting {option} (currently {settings[option]}): ', **val) settings[option] = new if new != '' else settings[option] break elif selected_option == 'Done': break else: option, val = selected_option, options[selected_option] new = prompt(f'Set setting {option} (currently {settings[option]}): ', **val) settings[option] = new if new != '' else settings[option] continue break # Now that all settings have been determined, perform the insert/update/delete in ALL sessions that were passed into the process # It is assumed that the system metadata that was queried for the 1st sf is the same in the other sessions if operation_confirmation == 'Y': def perform_operation(sf): params = {key.lower(): val for key,val in settings.items() if key not in ('BYPASS_AUTOMATION')} if settings.BYPASS_AUTOMATION is True: sf.add_bypass_settings() elif settings.BYPASS_AUTOMATION is False: sf.remove_bypass_settings() else: sf.add_bypass_settings(**settings.BYPASS_AUTOMATION) if settings.DISABLE_ACCOUNT_DUPLICATE_RULE is True: self.toggle_duplicate_rule('Account.', False) if operation == "insert": job_result = sf.insert(object_name, df, **params) elif operation == "update": job_result = sf.update(df, **params) elif operation == "upsert": job_result = sf.upsert(object_name, df, external_id_field_name, **params) elif operation == "delete": job_result = sf.delete(df, **params) elif operation == "undelete": job_result = sf.undelete(df, **params) num_failed_records = ( len([item for item in job_result if 'Error' in item.sf_result]) if settings.MODE == 'simple' else int(job_result["status"]["numberRecordsFailed"]) if job_result is not None and "status" in job_result else 0 ) if num_failed_records > 0: print(f"({sf.instance}) {num_failed_records} records failed.") if settings.BYPASS_AUTOMATION is not False: sf.remove_bypass_settings() if settings.DISABLE_ACCOUNT_DUPLICATE_RULE is True: self.toggle_duplicate_rule('Account.', True) return job_result def get_backup(sf, id_field): if not (settings.DO_BACKUP and operation in {'upsert','update','delete'}): return None try: backup_ids = df[id_field] mode = 'simple' if object_name != 'Event__c' else 'bulk' sf.console.new_line('Running Backup...') return sf.select(f"SELECT * FROM {object_name} WHERE {id_field} != NULL AND {id_field} IN @backup_ids", mode=mode, return_type='dataframe') except: return None id_field = external_id_field_name if operation == 'upsert' else 'Id' results = ObjDict() for sf in sessions: backup = get_backup(sf, id_field) job_thread = threading.new(perform_operation, sf) results[sf.instance] = ObjDict({'backup': backup, 'job': job_thread}) for result in results.values(): result.job = result.job.result() if result.backup is not None: ordered_cols = list({f:f for f in (['Id', id_field] + result.job.sent_fields + list(result.backup.columns.values))}) result.backup = result.backup[ordered_cols] print("\nAll operations complete!") return results else: print("\nTerminated.")