def sfBulkUpdate(sfBulk, namespace, sObject): myObject = sObject if len(namespace) > 0: myObject = namespace.upper() + '__' + sObject stageCSV = stageCSVDir + myObject + '_stg.csv' print(stageCSV) #print (sObject) job = sfBulk.create_insert_job(myObject, contentType='CSV', concurrency='Parallel') with open(stageCSV) as csvfile: reader = csv.DictReader(csvfile) #print (reader.fieldnames) rows = [] for row in reader: print("row****", dict(row)) rows.append(dict(row)) csv_iter = CsvDictsAdapter(iter(rows)) print("rows****", rows) batch = sfBulk.post_batch(job, csv_iter) sfBulk.wait_for_batch(job, batch) sfBulk.close_job(job) print("Done. Data Uploaded.")
def cleanOppoSF2(bulkNew=bulkNew): job = bulkNew.create_query_job("Opportunity", contentType='JSON') batch = bulkNew.query(job, "select Id, AccountId, LeadId__c, OldSalesforceExtID__c from Opportunity where LeadId__c = null and AccountId = null") bulkNew.close_job(job) while not bulkNew.is_batch_done(batch): time.sleep(10) data = [] datafull = [] for result in bulkNew.get_all_results_for_query_batch(batch): result = json.load(IteratorBytesIO(result)) for row in result: # row['OldSalesforceExtID__c'] = row['Id'] datafull.append(row) row.pop('OldSalesforceExtID__c', None) row.pop('AccountId', None) row.pop('LeadId__c', None) row.pop('attributes', None) data.append(row) print json.dumps(row, indent=4, sort_keys=True) if data: job_id = bulkNew.create_delete_job("Opportunity", contentType='CSV') content = CsvDictsAdapter(iter(data)) batch_id = bulkNew.post_batch(job_id, content) bulkNew.wait_for_batch(job_id, batch_id, timeout=120) results = bulkNew.get_batch_results(batch_id) for i in range(len(data)): datafull[i]['resultId-success-created-error'] = results[i] print json.dumps(datafull, indent=4, sort_keys=True) bulkNew.close_job(job_id) print "Clean opportunity done." else: print "No opportunity to clean" writeTxt(logPath+'CleanOppo12.txt', datafull)
def bulkUpdate(sObject): sfBulk = SalesforceBulk(username=username, password=password, security_token=security_token) job = sfBulk.create_insert_job(sObject, contentType='CSV', concurrency='Parallel') dir = "c:/kenandy/python/stageCSV/" stageCSV = dir + sObject + '.csv' print(stageCSV) with open(stageCSV) as csvfile: reader = csv.DictReader(csvfile) #print (reader.fieldnames) rows = [] for row in reader: print("row****", dict(row)) #print(row['Id'], row['Name']) # print(row['Id'], row['Name']) rows.append(dict(row)) #print("rows****", rows) csv_iter = CsvDictsAdapter(iter(rows)) #print("csv_iter**** ", csv_iter) print("rows****", rows) batch = sfBulk.post_batch(job, csv_iter) sfBulk.wait_for_batch(job, batch) sfBulk.close_job(job) print("Done. Data Uploaded.")
def tearDown(self): if hasattr(self, 'bulk'): job_id = self.bulk.create_query_job("Contact") self.jobs.append(job_id) batch_id = self.bulk.query( job_id, "SELECT Id FROM Contact WHERE FirstName LIKE 'BulkTestFirst%'") self.bulk.wait_for_batch(job_id, batch_id) self.bulk.close_job(job_id) results = self.bulk.get_all_results_for_query_batch(batch_id) results = (record for result in results for record in unicodecsv.DictReader(result, encoding='utf-8')) job_id = self.bulk.create_delete_job('Contact') self.jobs.append(job_id) for batch in batches(results): content = CsvDictsAdapter(iter(batch)) self.bulk.post_batch(job_id, content) self.bulk.close_job(job_id) for job_id in self.jobs: print("Closing job: %s" % job_id) try: self.bulk.close_job(job_id) except BulkApiError: pass
def del_ids(ids): print('creating bulk delete jobs') batchsize = 10000 count = len(ids) batches = math.ceil(count / batchsize) remaining = count for batch in range(batches): print('batch %d of %d starting' % (batch + 1, batches)) batchsize = min(batchsize, remaining) batchstart = batch * batchsize job = bulk.create_delete_job(object_type, contentType='CSV') ids_dict = [ dict(Id=idx) for idx in ids[batchstart:batchstart + batchsize] ] print(ids_dict) csv_iter = CsvDictsAdapter(iter(ids_dict)) batch = bulk.post_batch(job, csv_iter) bulk.close_job(job) print('waiting for batch') while not bulk.is_batch_done(batch): print('.', end='') sys.stdout.flush() sleep(0.5) for result in bulk.get_batch_results(batch): print(result)
def bulk_insert(self, object, data): job = self.bulk.create_insert_job(object, contentType='CSV') csv_iter = CsvDictsAdapter(iter(data)) batch = self.bulk.post_bulk_batch(job, csv_iter) self.connector_wait(job, batch, 'bulk insert done') # do not work should return Id`s of created elements # res = self.bulk.get_batch_result_iter(job,batch,parse_csv=False) self.bulk.close_job(job)
def zbulk(): job = bulk.create_insert_job("Account", contentType='CSV') accounts = [dict(Name="Account%d" % idx) for idx in range(10)] csv_iter = CsvDictsAdapter(iter(accounts)) batch = bulk.post_batch(job, csv_iter) bulk.wait_for_batch(job, batch) bulk.close_job(job) print("Done. Accounts uploaded.")
def bulk_upsert(self, object, external_id_name, data): job = self.bulk.create_upsert_job(object_name=object, external_id_name=external_id_name) csv_iter = CsvDictsAdapter(iter(data)) batch = self.bulk.post_bulk_batch(job, csv_iter) self.connector_wait(job, batch, 'upserting done') self.bulk.close_job(job) rows = [] for row in self.get_batch_result_iter(job, batch, parse_csv=False): rows.append(row) return rows
def action(operation): """Performs the Insertion, Deletion, or Update in the Salesforce org""" global object_name object_name = select(entity, 0) impacted_records = [] for index in range(len(df)): record = {} for col in df.columns: record[col] = df[col][index] impacted_records.append(record) try: MsgBox = messagebox.askquestion("Operation", ( 'You are about to {action} {length} records within the {obj} object' ' within your Salesforce org. Are you sure you want to proceed?' ).format(action=operation.lower(), length=str(len(impacted_records)), obj=object_name), icon='warning') if (MsgBox == 'yes'): bulk = SalesforceBulk( username=USERNAME, password=PASSWORD, security_token=TOKEN ) if (operation == "Delete"): job = bulk.create_delete_job(object_name, contentType='CSV') elif (operation == "Insert"): job = bulk.create_insert_job(object_name, contentType='CSV') else: job = bulk.create_update_job(object_name, contentType='CSV') csv_iter = CsvDictsAdapter(iter(impacted_records)) batch = bulk.post_batch(job, csv_iter) bulk.wait_for_batch(job, batch) bulk.close_job(job) result_df = pd.DataFrame(impacted_records) results = bulk.get_batch_results(bulk.get_batch_list(job)[0]['id']) result_df['ID'] = "" result_df['SUCCESS'] = "" result_df['ERROR'] = "" for index in range(len(result_df)): result_df['ID'][index] = str(results[index]).split("'")[1] result_df['SUCCESS'][index] = str(results[index]).split("'")[3] result_df['ERROR'][index] = str(results[index]).split("'")[7] input_file = (folder_path + "/results" + bulk.get_batch_list(job)[0]['id'] + ".xlsx") result_df.to_excel(input_file) messagebox.showinfo("Info", ( 'Job Details:\n\nNumber of Records Processed: {recordsProcessed}\n' 'Number of Records Failed: {recordsFailed}').format( recordsProcessed=bulk.job_status(job)['numberRecordsProcessed'], recordsFailed=bulk.job_status(job)['numberRecordsFailed'])) except Exception as e: messagebox.showerror("Error", e)
def bulk_update(self, object, data): job = self.bulk.create_update_job(object, contentType='CSV') csv_iter = CsvDictsAdapter(iter(data)) batch = self.bulk.post_bulk_batch(job, csv_iter) self.connector_wait(job, batch, 'bulk update done') # do not work shuld return Id`s of created elements self.bulk.close_job(job) rows = [] for row in self.get_batch_result_iter(job, batch, parse_csv=False): rows.append(row) return rows
def sfUpdate(sfObject, updateList, sfBulk): if updateList: job_id = sfBulk.create_update_job(sfObject, contentType='CSV', concurrency='Serial') content = CsvDictsAdapter(iter(updateList)) batch_id = sfBulk.post_batch(job_id, content) sfBulk.wait_for_batch(job_id, batch_id, timeout=120) results = sfBulk.get_batch_results(batch_id) for i in range(len(updateList)): updateList[i]['resultId-success-created-error'] = results[i] print json.dumps(updateList, indent=4, sort_keys=True) sfBulk.close_job(job_id) print sfObject + " update done." else: print "No " + sfObject + " to update"
def deleteBysOBject(sObject): #job = sfBulk.create_delete_job("Account", contentType='CSV') job = sfBulk.create_delete_job(sObject, contentType='CSV') del_list = [dict(Id="a0q1I000000fXEH"), dict(Id="a0q1I000000fXEG")] #accounts = [dict(Name="Account%d" % idx) for idx in range(5)] csv_iter = CsvDictsAdapter(iter(del_list)) batch = sfBulk.post_batch(job, csv_iter) sfBulk.wait_for_batch(job, batch) sfBulk.close_job(job) while not sfBulk.is_batch_done(batch): print('processing') sleep(1) """
def create_and_run_delete_job(self, object_name, data): job = self.bulk.create_delete_job(object_name, contentType='CSV') # Transform data from list of dictionaries into iterable CSV Content Type, # since the salesforce_bulk package provides a sweet class for it. csv_iter = CsvDictsAdapter(iter(data)) # Create a batch with the data and add it to the job. batch = self.bulk.post_batch(job, csv_iter) # Wait for the batch to complete. Default timeout is 10 minutes. self.bulk.wait_for_batch(job, batch, timeout=60 * 10) # Once the batch has been completed, get the results. results = self.bulk.get_batch_results(batch) # Close the Job. self.bulk.close_job(job) self.log.info( f'Delete {object_name} job has been successfully completed.') return results
def _upload_batches(self, mapping, batches): job_id = None table = self.tables[mapping.get('table')] for batch, batch_rows in batches: if not job_id: # Create a job only once we have the first batch to load into it job_id = self._create_job(mapping) # Prepare the rows rows = CsvDictsAdapter(iter(batch)) # Create the batch batch_id = self.bulk.post_bulk_batch(job_id, rows) self.logger.info(' Uploaded batch {}'.format(batch_id)) while not self.bulk.is_batch_done(job_id, batch_id): self.logger.info(' Checking batch status...') time.sleep(10) # Wait for batch to complete res = self.bulk.wait_for_batch(job_id, batch_id) self.logger.info(' Batch {} complete'.format(batch_id)) # salesforce_bulk is broken in fetching id results so do it manually results_url = '{}/job/{}/batch/{}/result'.format( self.bulk.endpoint, job_id, batch_id) headers = self.bulk.headers() resp = requests.get(results_url, headers=headers) csv_file = tempfile.TemporaryFile() csv_file.write(resp.content) csv_file.seek(0) reader = csv.DictReader(csv_file) # Write to the local Id column on the uploaded rows i = 0 for result in reader: row = batch_rows[i] i += 1 if result['Id']: setattr(row, mapping['fields']['Id'], result['Id']) # Commit to the db self.session.commit() self.bulk.close_job(job_id)
def insert_recommendations_in_db(product_id_vs_recommended_ids): if len(product_id_vs_recommended_ids) == 0: return job = bulk.create_insert_job(RECOMMENDATION_OBJECT, contentType='CSV', concurrency='Parallel') recommendations = [] for product_id in product_id_vs_recommended_ids: recommended_id_vs_scores = product_id_vs_recommended_ids[product_id] for recommended_id, score in recommended_id_vs_scores.items(): recommendations.append(get_recommendation_record(product_id, recommended_id, score)) if not recommendations: return csv_iter_recommendations = CsvDictsAdapter(iter(recommendations)) batch_insert_recommendations = bulk.post_batch(job, csv_iter_recommendations) bulk.wait_for_batch(job, batch_insert_recommendations) bulk.close_job(job) logging.info("Done. Recommendations uploaded.")
def delete_recommendations_in_db(product_ids): """delete all recommendation object records for product_ids""" if not product_ids: return where_clause = '{0} IN ({1})'.format(PRODUCT_ID_FIELD, ', '.join("'{0}'".format(w) for w in product_ids)) recommendation_object_ids = query_from_db('Id', object_name=RECOMMENDATION_OBJECT, where_clause=where_clause) ids = [{'Id': row['Id']} for row in recommendation_object_ids] if not ids: logging.info("Done. No Recommendations to delete.") return job = bulk.create_delete_job(RECOMMENDATION_OBJECT, contentType='CSV', concurrency='Parallel') csv_iter_recommendations = CsvDictsAdapter(iter(ids)) batch_delete_recommendations = bulk.post_batch(job, csv_iter_recommendations) bulk.wait_for_batch(job, batch_delete_recommendations) bulk.close_job(job) logging.info("Done. Recommendations deleted.")
def stg_sfBulkUpdate(sfBulk,sObject,stg_sObjectFile ): myFuncName = sys._getframe().f_code.co_name print("\n") print("[STG-04] ****** %s *** processing (%s) ************" % (myFuncName,sObject)) if IsCSVEmpty(stg_sObjectFile) == True: pass else: job = sfBulk.create_insert_job(sObject, contentType='CSV', concurrency='Parallel') with open(stg_sObjectFile) as csvfile: reader = csv.DictReader(csvfile) #print(' [No of rows import]:= ', len(list(reader)), end="") #print (reader.fieldnames) rows = [] for row in reader: print("row****", dict(row)) rows.append(dict(row)) csv_iter = CsvDictsAdapter(iter(rows)) print("rows****", rows) batch = sfBulk.post_batch(job, csv_iter) sfBulk.wait_for_batch(job, batch) sfBulk.close_job(job) #while not sfBulk.is_batch_done(batch): # sleep(10) """ for result in sfBulk.get_all_results_for_query_batch(batch): reader = unicodecsv.DictReader(result, encoding='utf-8') for row in reader: print (row) # dictionary rows """ print("Done. Data Uploaded.")
'paymentreference': 'cpm__Payment_Reference__c', 'amount': 'cpm__Amount__c', 'owner': 'OwnerId', 'status': 'Status__c', 'productgroup': 'Product_Group__c', 'eligibeforgiftaid': 'Eligible_for_Gift_Aid__c', 'giftaidclaimed': 'Gift_Aid_Claimed__c', 'transactiontype': 'Transaction_Type__c', 'datasource': 'Data_Source__c' } # Read the input file in batches and upload to SalesForce counter = 0 for gm_chunk in pd.read_csv(input_file, chunksize=200): if counter < 28: counter = counter + 1 continue else: input_slice = gm_chunk[list( sf_mapping.keys())].rename(columns=sf_mapping) input_processed = input_slice.where((pd.notnull(input_slice)), None) job = bulk.create_insert_job(target_obj, contentType='CSV') records = input_processed.to_dict('records') csv_iter = CsvDictsAdapter(iter(records)) batch = bulk.post_batch(job, csv_iter) bulk.wait_for_batch(job, batch) bulk.close_job(job) print("Done. Payment uploaded 200.") break
from salesforce_bulk import CsvDictsAdapter from salesforce_bulk import SalesforceBulk job = bulk.create_insert_job("Account", contentType='CSV') accounts = [dict(Name="Account%d" % idx) for idx in xrange(5)] csv_iter = CsvDictsAdapter(iter(accounts)) batch = bulk.post_batch(job, csv_iter) bulk.wait_for_batch(job, batch) bulk.close_job(job) print("Done. Accounts uploaded.")
def create_and_run_bulk_job(self, job_type, object_name, primary_key, data): """ Note: If the specified job_type is Update or Upsert, you must provide the kwarg "primary_key" for Salesforce to identify records with. :param job_type: "Update", "Insert", or "Upsert". :type job_type: str :param object_name: The Name of the SF Object you are performing a bulk job on. Ex. object_name = "Account" :type object_name: str :param primary_key: :type primary_key: str :param data: Needs to be formatted as a list of dictionaries. Ex. data = [{'Id': 1, 'Name': 'Andrew'}, {'Id': 2, 'Name': 'Jerry'}] :type data: list """ # Ensure string arguments have their first letter capitalized. job_type = str.title(job_type) object_name = str.title(object_name) # Connect and authenticate to Salesforce. # Create Job. self.log.info(f'Creating {object_name} {job_type} job.') if job_type not in ['Insert', 'Update', 'Upsert']: raise ReferenceError( 'Invalid job_type not specified. Please use "Insert", "Update", or "Upsert".' ) try: if job_type == 'Insert': job = self.bulk.create_insert_job(object_name, contentType='CSV') elif job_type == 'Update': job = self.bulk.create_update_job(object_name, contentType='CSV') soql_query = f'select Id, {primary_key} from {object_name}' query_job = self.bulk.create_query_job(object_name, contentType='CSV') query_batch = self.bulk.query(query_job, soql_query) self.bulk.close_job(query_job) self.bulk.wait_for_batch(query_job, query_batch, timeout=60 * 10) query_results = list( self.bulk.get_all_results_for_query_batch(query_batch)) if len(query_results) == 1: id_map = json.load(IteratorBytesIO(query_results[0])) for rec in data: for row in id_map: if primary_key not in row: key_split = primary_key.split('.') row[primary_key] = row[key_split[0]][ key_split[1]] if rec[primary_key] == row[primary_key]: rec['Id'] = row['Id'] break else: raise OverflowError( 'Query Results larger than expected. Please review.') elif job_type == 'Upsert': job = self.bulk.create_upsert_job(object_name, external_id_name=primary_key, contentType='CSV') except Exception as job_creation_error: self.log.info( f'Unable to create {object_name} {job_type} Job. Please verify the value of the object_name variable.' ) self.log.exception( f'Encountered exception when creating job: {job_creation_error}' ) raise # Transform data from list of dictionaries into iterable CSV Content Type, # since the salesforce_bulk package provides a sweet class for it. csv_iter = CsvDictsAdapter(iter(data)) # Create a batch with the data and add it to the job. batch = self.bulk.post_batch(job, csv_iter) # Wait for the batch to complete. Default timeout is 10 minutes. self.bulk.wait_for_batch(job, batch, timeout=60 * 10) # Once the batch has been completed, get the results. results = self.bulk.get_batch_results(batch) # Close the Job. self.bulk.close_job(job) self.log.info( f'{job_type}, {object_name}, job has been successfully completed.') return results
def generate_content(self, data): return CsvDictsAdapter(iter(data))
disbursals.append(row) total_size = np.asarray(disbursals) if len(disbursals) < 10: batch_size = len(disbursals) else: batch_size = 10 batches = np.array_split(total_size, batch_size) for b in batches: if job_type == 'update': job = bulk.create_update_job('{}'.format(sf_object), contentType='CSV', concurrency='Parallel') elif job_type == 'insert': job = bulk.create_insert_job('{}'.format(sf_object), contentType='CSV', concurrency='Parallel') else: print('Invalid job type specified, please check your syntax.') exit() csv_iter = CsvDictsAdapter(iter(b)) batch = bulk.post_batch(job, csv_iter) bulk.wait_for_batch(job, batch) bulk.close_job(job) results = bulk.get_batch_results(batch, job) for r in results: print(r) print('{} performed for {} total {} records.'.format(job_type,len(disbursals),sf_object)) ## UnitTest to validate data uploaded. Note, multiple uploads on the same object in one day can trigger a false positive for a failed test (will print negative variance): field_list = 'Id' field_query = field_list.strip("'") soql_data = sf.query_all("SELECT {} from {} Where LastModifiedById = '{}' and SystemModStamp = TODAY".format(field_query, sf_object, user_id)) df = pd.DataFrame(soql_data['records']).drop(columns='attributes')