def main(sessions, do_fix=False):
    first_session = sessions[0]
    for s in sessions:
        s.print_messages = False
    session = first_session

    files_list = [
        item for item in listdir("./resources/sf_update_files")
        if isdir("./resources/sf_update_files/" + item) == False
        and item != ".DS_Store" and not item.startswith("~$")
    ]
    files_list = sorted(files_list,
                        key=lambda item: -(path.getmtime(
                            "./resources/sf_update_files/" + item)))

    selected_file = prompt('\nThe following update files are available:',
                           files_list)

    rows = []
    # object_name = lastSettings["objectName"] if file_selection_input == "0" else None
    data_sheet_name = None

    # Settings defaults
    settings = ObjDict({
        "BATCH_SIZE": 2000,
        "OPERATION": None,
        "DO_UPSERT": None  # Should be deprecated... use OPERATION instead
        ,
        "BYPASS_AUTOMATION": None,
        "EXT_ID": None,
        "PARALLEL_CONCURRENCY": True,
        "BULK_API_MODE": True
    })

    # Get data and settings from file
    if selected_file.endswith(".csv"):
        with open("./resources/sf_update_files/" + selected_file,
                  'r',
                  encoding='utf-8-sig') as file:
            reader = csv.DictReader(file)
            for row in reader:
                rows.append(row)
    elif selected_file.endswith(".xlsx"):
        file_path = './resources/sf_update_files/{}'.format(selected_file)
        xlsx_file = pd.ExcelFile(file_path)
        sheets = xlsx_file.sheet_names
        data_sheet_name = sheets[0]
        datadf = pd.read_excel(xlsx_file, data_sheet_name)
        settingsdf = pd.read_excel(
            xlsx_file, 'Settings') if 'Settings' in sheets else None

        # Fill nulls with blank strings
        datadf = datadf.fillna(value='')
        # Set column names to strings
        datadf.columns = datadf.columns.astype(str)
        # Set timestamp columns to string
        for col in datadf.select_dtypes(include=['datetime64']).columns.values:
            datadf[col] = datadf[col].astype(str)
        # Set numeric columns to zero-trimmed string
        for col in datadf.select_dtypes(
                include=['int64', 'float64']).columns.values:
            datadf[col] = datadf[col].astype(float).astype(str)
            datadf[col] = datadf[col].str.replace('.0', '', regex=False)

        rows = datadf.to_dict('records')

        if settingsdf is not None:
            inputsettings = settingsdf.set_index('Field').to_dict('index')
            inputsettings = {
                key: val['Value']
                for key, val in inputsettings.items()
            }
            settings.update(inputsettings)

        # xlsx_file = xlrd.open_workbook("./resources/sf_update_files/" + selected_file)
        # sheets = xlsx_file.sheet_names()
        # data_sheet = xlsx_file.sheet_by_index(0)
        # data_sheet_name = sheets[0]
        # settings_sheet = xlsx_file.sheet_by_name("Settings") if "Settings" in sheets else None

        # headers = [str(v) for v in data_sheet.row_values(0)]
        # for row_num in range(1,data_sheet.nrows):
        #     new_row = dict()
        #     src_row = data_sheet.row_values(row_num)
        #     for col_num in range(0,len(headers)):
        #         new_row[headers[col_num]] = src_row[col_num]
        #     rows.append(new_row)

        # if settings_sheet is not None:
        #     for row_num in range(0, settings_sheet.nrows):
        #         src_row = settings_sheet.row_values(row_num)

        #         settings[src_row[0]] = True if src_row[1] == 1 else False if src_row[1] == 0 else src_row[1]
        pass
    else:
        print("No file")

    # Handle for deprecated DO_UPSERT setting
    if settings.DO_UPSERT is True and settings.OPERATION is None:
        settings.OPERATION = 'UPSERT'

    operation = str(
        settings.OPERATION).lower() if settings.OPERATION is not None else None

    # Try to detect Object name from record Ids in file
    # If no record Ids are present, try to use the name of the tab in the file we are loading
    # If no match is found, prompt the user for the object name
    rows_with_id = [r for r in rows if "Id" in r and r["Id"] != ""]
    source_field_names = {key for key in rows[0].keys()}
    if len(rows_with_id) > 0:
        object_name = session.get_object_name(rows_with_id[0]["Id"])
    else:
        all_object_names = [
            item["name"] for item in session.get_org_description()["sobjects"]
        ]
        object_names_in_file_name = [
            item for item in all_object_names
            if " " + item + " " in selected_file
        ]
        if data_sheet_name in all_object_names:
            object_name = data_sheet_name
        elif len(object_names_in_file_name) == 1:
            object_name = object_names_in_file_name[0]
        else:
            object_name = prompt(
                "\nWhat object are the records in this file for?")

    # lastSettings["objectName"] = object_name
    # with open(settingsLoc, 'w') as outfile:
    #     json.dump(lastSettings, outfile)

    try:
        object_desc = session.get_object_description(object_name)
    except:
        raise

    if operation is None or operation == 'upsert':
        source_fields_relationship_names = {
            f[0:f.find('.')]
            for f in source_field_names if '.' in f
        }

        upsert_matches = [{
            "field": item["name"]
        } for item in object_desc.fields if item["externalId"] == True
                          and item["name"] in source_field_names]

        possible_reference_upsert_matches = [
            {
                "referenceTo":
                item.referenceTo[0],
                "match_string":
                item.relationshipName + ".",
                "reference_object_descs": [
                    threading.new(session.get_object_description, r)
                    for r in item.referenceTo
                ]
            } for item in object_desc.fields if len(item.referenceTo) > 0
            and item.relationshipName in source_fields_relationship_names
        ]

        reference_upsert_matches = []

        for field in source_field_names:
            for match in possible_reference_upsert_matches:
                if field.startswith(match["match_string"]):
                    upsert_match_object_desc = session.get_object_description(
                        match["referenceTo"])
                    reference_upsert_matches.extend(
                        [{
                            "field": field,
                            "matching_object": match["referenceTo"],
                            "matching_field": item["name"]
                        } for item in upsert_match_object_desc["fields"]
                         if item["externalId"] == True
                         and item["name"] == field[field.find(".") + 1:]])

        if len(upsert_matches) > 0:
            print(
                "\nFound the following External ID references for this object: {}"
                .format(", ".join([item["field"] for item in upsert_matches])))
        if len(reference_upsert_matches) > 0:
            print(
                "Found the following External ID references for a lookup object: {}"
                .format(", ".join(
                    [item["field"] for item in reference_upsert_matches])))

        if len(upsert_matches) + len(
                reference_upsert_matches) > 0 and settings.OPERATION is None:
            if prompt("\nWould you like to upsert?", boolean=True):
                operation = 'upsert'

    if operation is None:
        if ((len(rows_with_id) > 0) == False  # If true, cannot do insert
                and (len(rows_with_id) != len(rows))
                == False):  # If true, cannot do update
            operation = prompt("\nWhat operation would you like to perform?",
                               options={
                                   'Insert': 'insert',
                                   'Update': 'update'
                               })

    # lastSettings["doUpsert"] = do_upsert
    # with open(settingsLoc, 'w') as outfile:
    #     json.dump(lastSettings, outfile)

    if operation == 'upsert':
        upsert_matches.insert(0, {"field": "Id"})
        self_external_id = settings.EXT_ID

        if self_external_id is None:
            if len(upsert_matches) == 1:
                self_external_id = upsert_matches[0]["field"]
            else:
                self_external_id = prompt(
                    "\nWhat ID field would you like to use for upsert?",
                    options=[item['field'] for item in upsert_matches])
                # print("\nWhat ID field would you like to use for upsert?")
                # counter = 1
                # print_str = ""
                # for item in upsert_matches:
                #     print_str += "{}) {} \n".format(counter, item["field"])
                #     counter += 1
                # print(print_str)
                # self_external_id = upsert_matches[int(input())-1]["field"]

        if len([
                item for item in upsert_matches
                if item["field"] == self_external_id
        ]) == 0:
            print(
                "External ID field '{}' does not appear in the selected file name."
                .format(self_external_id))
            raise

    fields_to_update = [
        item["name"] for item in object_desc["fields"]
        if item["name"] in rows[0] and item["updateable"] == True
        and item["calculated"] == False and item["autoNumber"] == False
    ]
    fields_to_update.extend(
        [item["field"] for item in reference_upsert_matches])
    fields_to_ignore = [
        item for item in rows[0] if item not in fields_to_update
    ]

    rows_to_update = [{
        f: v
        for (f, v) in r.items() if f == "Id" or f in fields_to_update
        or f in [mat["field"] for mat in upsert_matches]
    } for r in rows]

    mode = 'bulk' if settings.BULK_API_MODE == True else 'simple'
    if settings.BYPASS_AUTOMATION is None:
        settings.BYPASS_AUTOMATION = prompt(
            f"\nDo you need to bypass automation for this {operation}?",
            boolean=True)

    print("Selected file:     {}".format(selected_file))
    print("Operation:         {}".format(operation.title()) +
          (" (on {})".format(self_external_id
                             ) if self_external_id is not None else ""))
    print("Table:             {}".format(object_name))
    print("Bypass automation: {}".format(settings.BYPASS_AUTOMATION))
    print("Fields to update:  {}".format(", ".join(fields_to_update)))
    print("Fields to ignore:  {}".format(", ".join(fields_to_ignore)))
    do_operation_confirmation = prompt(
        f"\nWill {operation} {len(rows)} records. Are you sure?", boolean=True)

    # Now that all settings have been determined, perform the insert/update/delete in ALL sessions that were passed into the process
    # It is assumed that the system metadata that was queried for the 1st session is the same in the other sessions
    concurrency = "Parallel" if settings.PARALLEL_CONCURRENCY else "Serial"
    if do_operation_confirmation:
        settings.BYPASS_AUTOMATION = settings.BYPASS_AUTOMATION

        def perform_crud_operation(session):
            if settings.BYPASS_AUTOMATION:
                session.add_bypass_settings()
            else:
                session.remove_bypass_settings()

            if operation == "insert":
                job_result = session.insert_records(object_name,
                                                    rows_to_update,
                                                    concurrency=concurrency)
            elif operation == "update":
                job_result = session.update_records(rows_to_update,
                                                    concurrency=concurrency)
            elif operation == "upsert":
                job_result = session.upsert_records(object_name,
                                                    rows_to_update,
                                                    self_external_id,
                                                    concurrency=concurrency,
                                                    mode=mode)
            else:
                pass

            if job_result is not None and "status" in job_result and job_result[
                    "status"]["numberRecordsFailed"] != "0":
                print("{} records failed.".format(
                    job_result["status"]["numberRecordsFailed"]))
                # results = session.get_job_results(job_result)
                # session.write_file("./resources/sf_update_files/error_logs/error_{}".format(selected_file.replace(".xlsx", ".csv")), results)

            if settings.BYPASS_AUTOMATION:
                session.remove_bypass_settings()

        for session in sessions:
            threading.new(perform_crud_operation, session)
        threading.wait()

        print("\nOperation complete!")
    else:
        print("\nTerminated.")

    pass
Esempio n. 2
0
    def dataload_file(self, file_path):
        object_name = None
        external_id_field_name = None
        # Settings defaults
        settings = ObjDict({
            "BATCH_SIZE": 2000
            , "OPERATION": None
            , "BYPASS_AUTOMATION": None
            , "CONCURRENCY": 'Parallel'
            , "MODE": 'simple'
            , "DO_BACKUP": True
            , "DISABLE_ACCOUNT_DUPLICATE_RULE": False
        })
        if file_path.endswith('.csv'):
            df = pd.read_csv(file_path)
            data_sheet_name = None
        elif file_path.endswith('.xlsx'):
            xlsx = pd.ExcelFile(file_path)
            all_sheets = xlsx.sheet_names
            option_sheets = [s for s in all_sheets if s != 'Settings']
            data_sheet_name = option_sheets[0] if len(option_sheets) == 1 else prompt('Which sheet should be loaded?', options=option_sheets)
            df = pd.read_excel(xlsx, data_sheet_name)

            df.columns = df.columns.astype(str)

            if 'Settings' in all_sheets:
                df_settings = pd.read_excel(xlsx, 'Settings')
                inputsettings = {
                    key:val['Value']
                    for key,val in df_settings.set_index('Field').to_dict('index').items()
                }
                settings.update(inputsettings)
                assert type(settings.BATCH_SIZE) is int
                assert settings.OPERATION.lower() in (None,'insert','update','upsert','delete','undelete')
                assert settings.BYPASS_AUTOMATION in (True,False)
                assert settings.CONCURRENCY in ('Parallel','Serial')
                assert settings.MODE in ('simple','bulk')
        else:
            raise Exception('Can only parse files with a .csv or .xlsx extension')

        instances = prompt('For which instance(s) should this file be loaded?', multiselect=True, options={
            (key[key.rindex('.')+1:] + (' prod' if val['sandbox'] == 'False' else '')).strip(): val 
            for key, val in self.sf.instance_credentials.items() if 'security_token' in val
        })
        sessions = [Salesforce_API(c) for c in instances]

        # Try to detect Object based on records in the file
        # If no record Ids are present, try to use the name of the tab in the file
        if 'Id' in df:
            id_list = df[['Id']].fillna('').query("Id != ''")['Id'].to_list()
            if len(id_list) > 0:
                try:
                    object_name = sessions[0].get_object_name(id_list)
                except: pass
        if object_name is None:
            all_object_names = {item.name for item in sessions[0].get_org_description().sobjects}
            if data_sheet_name is not None and data_sheet_name in all_object_names:
                object_name = data_sheet_name
            if object_name is None:
                object_name = prompt("\nWhat object are the records in this file for?", expected=all_object_names)
        if 'RecordType.Name' in df and 'RecordTypeId' not in df:
            df['RecordTypeId'] = df['RecordType.Name'].apply(lambda x: self.get_recordtype_map()[(object_name, x)].Id if (object_name, x) in self.get_recordtype_map() else None)
        object_desc = threading.new(sessions[0].get_object_description, object_name)

        operation = str(settings.OPERATION).lower() if settings.OPERATION is not None else None
        if operation is None:
            all_records_have_id = 'Id' in df and len(df) == len(df.fillna('').query("Id != ''"))
            operation_options = [
                'insert',
                'upsert',
                'update' if all_records_have_id else None,
                'delete' if all_records_have_id else None,
                'undelete' if all_records_have_id else None,
            ]
            operation = prompt('What operation needs to be performed?', options=[o for o in operation_options if o is not None])

        if operation == 'upsert':
            external_id_fields = [f.name for f in object_desc.result().fields if f.externalId is True and f.name in df.columns.values]
            if len(external_id_fields) == 1:
                external_id_field_name = external_id_fields[0]
            elif len(external_id_fields) == 0:
                external_id_field_name = 'Id'
            else:
                external_id_field_name = prompt('Which field would you like to use as the External ID?', options=external_id_fields)

        if 'RecordTypeId' not in df and operation in ('insert','upsert') and len(object_desc.result().recordTypeInfos) > 1:
            default_record_type = [item.name for item in object_desc.result().recordTypeInfos if item.defaultRecordTypeMapping is True][0]
            if prompt(f'No RecordTypeId was specified. New records will be defaulted to RecordType = "{default_record_type}". Proceed?', boolean=True) is False:
                raise Exception('Terminated - No RecordTypeId specified')

        if settings.DO_BACKUP and len(df) > 5000:
            if not prompt(f"Large record count: {len(df)} records. Do you want to run a backup? (Normally automatic)", boolean=True):
                settings.DO_BACKUP=False

        valid_fields = sessions[0]._get_valid_fields_for_operation(operation, object_name, first_row=df.iloc[0].to_dict(), external_id_field_name=external_id_field_name)
        fields_to_update = [f for f in df.columns.values if f.lower() in valid_fields]
        fields_to_ignore = [f for f in df.columns.values if f.lower() not in valid_fields]

        custom_settings_options = {
            'Yes': True,
            'No': False,
            'Set Data Admin Only': {'dataadmin': True, 'automation': False, 'pbflow': False, 'fc': False, 'multicurrency': False},
            'Custom': self.sf.lndatautil._configure_GeneralSettings__c
        }
        
        settings.BYPASS_AUTOMATION = settings.BYPASS_AUTOMATION if settings.BYPASS_AUTOMATION is not None else prompt(f"\nDo you need to bypass automation for this {operation}?", options=custom_settings_options, call_options=True)
        while True:
            external_id_lookups_text = ', '.join([f for f in fields_to_update if '.' in f])
            operation_message_end_text =  f" on {external_id_field_name}" if operation == 'upsert' else ''
            operation_message_end_text += f" and External Id lookup(s) for: {external_id_lookups_text}" if len(external_id_lookups_text) > 0 else ''
            print(f"Selected file:     {file_path[file_path.rindex('/')+1:]}")
            print(f"Operation:         {operation.title()}{operation_message_end_text}")
            print(f"Table:             {object_name}")
            print(f"Bypass Settings:   {settings.BYPASS_AUTOMATION}")
            print(f"Job Settings:      Mode: {settings.MODE}, Batch size: {settings.BATCH_SIZE}, Concurrency: {settings.CONCURRENCY}")
            print(f"Fields to update:  {', '.join(fields_to_update)}")
            print(f"Fields to ignore:  {', '.join(fields_to_ignore)}")
            operation_confirmation = prompt(f"\nWill {operation} {len(df)} records. Are you sure? Press 'M' to modify settings. (Y/N/M)", expected=['Y','N','M'])
            if operation_confirmation == 'M':
                options = {
                    "BYPASS_AUTOMATION": {'options':custom_settings_options, 'call_options': True},
                    "BATCH_SIZE": {'expected_type': int},
                    "CONCURRENCY": {'expected': ['Parallel','Serial','']},
                    "MODE": {'expected': ['simple','bulk','']},
                    "DISABLE_ACCOUNT_DUPLICATE_RULE": {'options': [True, False]},
                }
                while True:
                    modify_options = ['All'] + list(options.keys()) + ['Done']
                    selected_option = prompt('Modify:', modify_options)
                    if selected_option == 'All':
                        for option, val in options.items():
                            new = prompt(f'Set setting {option} (currently {settings[option]}): ', **val)
                            settings[option] = new if new != '' else settings[option]
                        break
                    elif selected_option == 'Done':
                        break
                    else:
                        option, val = selected_option, options[selected_option]
                        new = prompt(f'Set setting {option} (currently {settings[option]}): ', **val)
                        settings[option] = new if new != '' else settings[option]
                continue
            break


        # Now that all settings have been determined, perform the insert/update/delete in ALL sessions that were passed into the process
        # It is assumed that the system metadata that was queried for the 1st sf is the same in the other sessions
        if operation_confirmation == 'Y':
            def perform_operation(sf):
                params = {key.lower(): val for key,val in settings.items() if key not in ('BYPASS_AUTOMATION')}
                if settings.BYPASS_AUTOMATION is True:
                    sf.add_bypass_settings()
                elif settings.BYPASS_AUTOMATION is False:
                    sf.remove_bypass_settings()
                else:
                    sf.add_bypass_settings(**settings.BYPASS_AUTOMATION)
                if settings.DISABLE_ACCOUNT_DUPLICATE_RULE is True:
                    self.toggle_duplicate_rule('Account.', False)

                if operation == "insert":
                    job_result = sf.insert(object_name, df, **params)
                elif operation == "update":
                    job_result = sf.update(df, **params)
                elif operation == "upsert":
                    job_result = sf.upsert(object_name, df, external_id_field_name, **params)
                elif operation == "delete":
                    job_result = sf.delete(df, **params)
                elif operation == "undelete":
                    job_result = sf.undelete(df, **params)

                num_failed_records = (
                    len([item for item in job_result if 'Error' in item.sf_result]) if settings.MODE == 'simple'
                    else int(job_result["status"]["numberRecordsFailed"]) if job_result is not None and "status" in job_result
                    else 0
                )
                if num_failed_records > 0:
                    print(f"({sf.instance}) {num_failed_records} records failed.")

                if settings.BYPASS_AUTOMATION is not False:
                    sf.remove_bypass_settings()
                if settings.DISABLE_ACCOUNT_DUPLICATE_RULE is True:
                    self.toggle_duplicate_rule('Account.', True)
                return job_result
            def get_backup(sf, id_field):
                if not (settings.DO_BACKUP and operation in {'upsert','update','delete'}):
                    return None
                try:
                    backup_ids = df[id_field]
                    mode = 'simple' if object_name != 'Event__c' else 'bulk'
                    sf.console.new_line('Running Backup...')
                    return sf.select(f"SELECT * FROM {object_name} WHERE {id_field} != NULL AND {id_field} IN @backup_ids", mode=mode, return_type='dataframe')
                except:
                    return None

            id_field = external_id_field_name if operation == 'upsert' else 'Id'
            results = ObjDict()
            for sf in sessions:
                backup = get_backup(sf, id_field)
                job_thread = threading.new(perform_operation, sf)
                results[sf.instance] = ObjDict({'backup': backup, 'job': job_thread})
            for result in results.values():
                result.job = result.job.result()
                if result.backup is not None:
                    ordered_cols = list({f:f for f in (['Id', id_field] + result.job.sent_fields + list(result.backup.columns.values))})
                    result.backup = result.backup[ordered_cols]

            print("\nAll operations complete!")
            return results
        else:
            print("\nTerminated.")