def sf12Query(lastHourDateTime, bulkOld=bulkOld): intLead = [] intOppo = [] batch = buildQueryBatch(bulkOld, sfConstants.interactionSF12, 'interaction__c', lastHourDateTime) data = [] for result in bulkOld.get_all_results_for_query_batch(batch): result = json.load(IteratorBytesIO(result)) for row in result: data.append(row) print json.dumps(row, indent=4, sort_keys=True) writeTxt(logPath+'Qinteraction12.txt', data) batch = buildQueryBatch(bulkOld, sfConstants.opportunitySF12, 'opportunity', lastHourDateTime, intOppo) data = [] for result in bulkOld.get_all_results_for_query_batch(batch): result = json.load(IteratorBytesIO(result)) for row in result: if row['Lead__c'] and row['Lead__c'] not in intLead: data.append(row) print json.dumps(row, indent=4, sort_keys=True) writeTxt(logPath+'Qoppo12.txt', data) batch = buildQueryBatch(bulkOld, sfConstants.leadSF12, 'lead', lastHourDateTime, intLead) while not bulkOld.is_batch_done(batch): time.sleep(10) data = [] for result in bulkOld.get_all_results_for_query_batch(batch): result = json.load(IteratorBytesIO(result)) for row in result: if (row['MobilePhone'] or row['Phone'] or row['Email']) or row['Id'] in intLead: data.append(row) print json.dumps(row, indent=4, sort_keys=True) writeTxt(logPath+'Qlead12.txt', data)
def test_perform_bulk_api_pass_converts_datetimes(self, bulk_proxy): connection = Mock() oc = amaxa.ExtractOperation(connection) oc.get_field_map = Mock(return_value={ 'CreatedDate': { 'name': 'CreatedDate', 'type': 'datetime' } }) retval = [{'Id': '001000000000001', 'CreatedDate': 1546659665000}] bulk_proxy.is_batch_done = Mock(side_effect=[False, True]) bulk_proxy.create_query_job = Mock(return_value='075000000000000AAA') bulk_proxy.get_all_results_for_query_batch = Mock(return_value=[ IteratorBytesIO([json.dumps(retval).encode('utf-8')]) ]) step = amaxa.ExtractionStep('Account', amaxa.ExtractionScope.QUERY, ['CreatedDate']) step.store_result = Mock() oc.add_step(step) step.initialize() step.perform_bulk_api_pass('SELECT Id, CreatedDate FROM Account') step.store_result.assert_called_once_with({ 'Id': '001000000000001', 'CreatedDate': '2019-01-05T03:41:05.000+0000' })
def test_perform_bulk_api_pass_stores_results(self, bulk_proxy): connection = Mock() oc = amaxa.ExtractOperation(connection) oc.get_field_map = Mock( return_value={ 'Lookup__c': { 'name': 'Lookup__c', 'type': 'reference', 'referenceTo': ['Account'] } }) retval = [{'Id': '001000000000001'}, {'Id': '001000000000002'}] bulk_proxy.is_batch_done = Mock(side_effect=[False, True]) bulk_proxy.create_query_job = Mock(return_value='075000000000000AAA') bulk_proxy.get_all_results_for_query_batch = Mock(return_value=[ IteratorBytesIO([json.dumps(retval).encode('utf-8')]) ]) step = amaxa.ExtractionStep('Account', amaxa.ExtractionScope.ALL_RECORDS, ['Lookup__c']) step.store_result = Mock() oc.add_step(step) step.initialize() step.perform_bulk_api_pass('SELECT Id FROM Account') step.store_result.assert_any_call(retval[0]) step.store_result.assert_any_call(retval[1])
def _store_inserted_ids_for_batch( self, result_file, local_ids, id_table_name, conn ): # Set up a function to generate rows based on this result file def produce_csv(): """Iterate over job results and prepare rows for id table""" reader = unicodecsv.reader(result_file) next(reader) # skip header i = 0 for row, local_id in zip(reader, local_ids): if row[1] == "true": # Success sf_id = row[0] yield "{},{}\n".format(local_id, sf_id).encode("utf-8") else: if self.options["ignore_row_errors"]: self.logger.warning( " Error on row {}: {}".format(i, row[3]) ) else: raise BulkDataException("Error on row {}: {}".format(i, row[3])) i += 1 # Bulk insert rows into id table columns = ("id", "sf_id") data_file = IteratorBytesIO(produce_csv()) self._sql_bulk_insert_from_csv(conn, id_table_name, columns, data_file)
def get_query_records_dict(self, db_table, soql_query): """Execute bulk Salesforce soql queries and return results as generator of dictionaries. :param db_table: Database table name :param soql_query: Soql queries :return: If success, List of result record dictionaries; Else empty list """ self.bulk = SalesforceBulk(sessionId=self.session_id, host=self.instance) job = self.bulk.create_query_job(db_table, contentType="JSON") batch = self.bulk.query(job, soql_query) self.bulk.close_job(job) while not self.bulk.is_batch_done(batch): print("Waiting for batch query to complete") sleep(10) dict_records = [] rec_count = 0 print("Iterating through batch result set") for result in self.bulk.get_all_results_for_query_batch(batch): result = json.load(IteratorBytesIO(result)) for row in result: rec_count += 1 dict_records.append(row) print("Current fetched record count: ", rec_count) return dict_records
def test_bulk_query_converts_datetimes(self): sf = Mock() sf.bulk_url = "https://salesforce.com" conn = Connection(sf) conn._bulk = Mock() retval = [ {"Id": "001000000000001", "CreatedDate": 1546659665000}, {"Id": "001000000000002", "CreatedDate": None}, ] conn._bulk.is_batch_done = Mock(side_effect=[False, True]) conn._bulk.create_query_job = Mock(return_value="075000000000000AAA") conn._bulk.get_all_results_for_query_batch = Mock( return_value=[IteratorBytesIO([json.dumps(retval).encode("utf-8")])] ) results = list( conn.bulk_api_query( "Account", "SELECT Id, CreatedDate, FROM Account", ["CreatedDate"], 5 ) ) self.assertEqual( results[0], {"Id": "001000000000001", "CreatedDate": "2019-01-05T03:41:05.000+0000"}, ) self.assertEqual(results[1], {"Id": "001000000000002", "CreatedDate": None})
def query_from_db(*fields, object_name=None, where_clause=None): if object_name is None: raise ValueError('object_name is not provided.') job = bulk.create_query_job(object_name, contentType='JSON') query = 'select ' + ', '.join(fields) + ' from ' + object_name if where_clause is not None: query += ' where ' + where_clause logging.info(query) batch = bulk.query(job, query) bulk.close_job(job) try: while not bulk.is_batch_done(batch): logging.info('batch status: %s', bulk.batch_status(batch_id=batch)['state']) time.sleep(10) finally: if not bulk.is_batch_done(batch): bulk.abort_job(job) logging.info('aborted job') records = [] for result in bulk.get_all_results_for_query_batch(batch): result = json.load(IteratorBytesIO(result)) for row in result: records.append(row) return records
def cleanOppoSF2(bulkNew=bulkNew): job = bulkNew.create_query_job("Opportunity", contentType='JSON') batch = bulkNew.query(job, "select Id, AccountId, LeadId__c, OldSalesforceExtID__c from Opportunity where LeadId__c = null and AccountId = null") bulkNew.close_job(job) while not bulkNew.is_batch_done(batch): time.sleep(10) data = [] datafull = [] for result in bulkNew.get_all_results_for_query_batch(batch): result = json.load(IteratorBytesIO(result)) for row in result: # row['OldSalesforceExtID__c'] = row['Id'] datafull.append(row) row.pop('OldSalesforceExtID__c', None) row.pop('AccountId', None) row.pop('LeadId__c', None) row.pop('attributes', None) data.append(row) print json.dumps(row, indent=4, sort_keys=True) if data: job_id = bulkNew.create_delete_job("Opportunity", contentType='CSV') content = CsvDictsAdapter(iter(data)) batch_id = bulkNew.post_batch(job_id, content) bulkNew.wait_for_batch(job_id, batch_id, timeout=120) results = bulkNew.get_batch_results(batch_id) for i in range(len(data)): datafull[i]['resultId-success-created-error'] = results[i] print json.dumps(datafull, indent=4, sort_keys=True) bulkNew.close_job(job_id) print "Clean opportunity done." else: print "No opportunity to clean" writeTxt(logPath+'CleanOppo12.txt', datafull)
def _import_results(self, mapping, result_file, conn): # Map SF field names to local db column names sf_header = result_file.readline().strip().decode("utf-8").split(",") columns = [] for sf in sf_header: if sf == "Records not found for this query": return if sf: column = mapping["fields"].get(sf) if not column: column = mapping.get("lookups", {}).get(sf, {}).get("key_field") if column: columns.append(column) if not columns: return record_type = mapping.get("record_type") if record_type: columns.append("record_type") processor = log_progress( process_incoming_rows(result_file, record_type), self.logger) data_file = IteratorBytesIO(processor) self._sql_bulk_insert_from_csv(conn, mapping["table"], columns, data_file) self.session.commit()
def test_perform_bulk_api_pass_stores_high_volume_results(self, bulk_proxy): connection = Mock() oc = amaxa.ExtractOperation(connection) oc.get_field_map = Mock(return_value={ 'Lookup__c': { 'name': 'Lookup__c', 'type': 'reference', 'referenceTo': ['Account'] } }) retval = [] for i in range(5): retval.append([ {'Id': '00100000{:d}{:06d}'.format(i, j), 'Name': 'Account {:d}{:06d}'.format(i, j)} for j in range(100000) ]) bulk_proxy.is_batch_done = Mock(side_effect=[False, True]) bulk_proxy.create_query_job = Mock(return_value = '075000000000000AAA') bulk_proxy.get_all_results_for_query_batch = Mock( return_value = [IteratorBytesIO([json.dumps(chunk).encode('utf-8')]) for chunk in retval] ) step = amaxa.ExtractionStep('Account', amaxa.ExtractionScope.ALL_RECORDS, ['Lookup__c']) step.store_result = Mock() oc.add_step(step) step.initialize() step.perform_bulk_api_pass('SELECT Id FROM Account') self.assertEqual(500000, step.store_result.call_count)
def test_bulk_api_query(self): # FIXME: test wait sf = Mock() sf.bulk_url = "https://salesforce.com" conn = Connection(sf) conn._bulk = Mock() retval = [{"Id": "001000000000001"}, {"Id": "001000000000002"}] conn._bulk.is_batch_done = Mock(side_effect=[False, True]) conn._bulk.create_query_job = Mock(return_value="075000000000000AAA") conn._bulk.get_all_results_for_query_batch = Mock( return_value=[IteratorBytesIO([json.dumps(retval).encode("utf-8")])] ) results = list(conn.bulk_api_query("Account", "SELECT Id FROM Account", [], 5)) conn._bulk.query.assert_called_once_with( "075000000000000AAA", "SELECT Id FROM Account" ) self.assertEqual( conn._bulk.is_batch_done.call_args_list, [call(conn._bulk.query.return_value), call(conn._bulk.query.return_value)], ) conn._bulk.get_all_results_for_query_batch.assert_called_once_with( conn._bulk.query.return_value ) self.assertEqual(retval, results)
def _import_results(self, mapping, result_file, conn): # Map SF field names to local db column names sf_header = [ name.strip('"') for name in result_file.readline().strip().decode("utf-8").split(",") ] columns = [] lookup_keys = [] for sf in sf_header: if sf == "Records not found for this query": return if sf: column = mapping.get("fields", {}).get(sf) if not column: lookup = mapping.get("lookups", {}).get(sf, {}) if lookup: lookup_keys.append(sf) column = get_lookup_key_field(lookup, sf) if column: columns.append(column) if not columns: return record_type = mapping.get("record_type") if record_type: columns.append("record_type") processor = log_progress( process_incoming_rows(result_file, record_type), self.logger ) data_file = IteratorBytesIO(processor) if mapping["oid_as_pk"]: self._sql_bulk_insert_from_csv(conn, mapping["table"], columns, data_file) else: # If using the autogenerated id field, split out the CSV file from the Bulk API # into two separate files and load into the main table and the sf_id_table with tempfile.TemporaryFile("w+b") as f_values: with tempfile.TemporaryFile("w+b") as f_ids: data_file_values, data_file_ids = self._split_batch_csv( data_file, f_values, f_ids ) self._sql_bulk_insert_from_csv( conn, mapping["table"], columns, data_file_values ) self._sql_bulk_insert_from_csv( conn, mapping["sf_id_table"], ["sf_id"], data_file_ids ) if "RecordTypeId" in mapping["fields"]: self._extract_record_types( mapping["sf_object"], mapping["record_type_table"], conn ) self.session.commit() if lookup_keys and not mapping["oid_as_pk"]: self._convert_lookups_to_id(mapping, lookup_keys)
def sf21Query(lastHourDateTime, bulkNew=bulkNew): intLead = [] intOppo = [] batch = buildQueryBatch(bulkNew, sfConstants.eventSF21, 'event', lastHourDateTime, newToOld = True) data = [] for result in bulkNew.get_all_results_for_query_batch(batch): result = json.load(IteratorBytesIO(result)) for row in result: if not row['WhatId']: #Closer Appt, query lead # intLead.append(row['NewSalesforceLeadId__c']) data.append(row) # if row['WhatId']: #CAD Appt, query opportunity # intOppo.append(row['WhatId']) # data.append(row) #not syncing CAD meeting before we map the external id on Opportunity print json.dumps(row, indent=4, sort_keys=True) writeTxt(logPath+'Qinteraction21.txt', data) batch = buildQueryBatch(bulkNew, sfConstants.opportunitySF21, 'opportunity', lastHourDateTime, intOppo, newToOld = True) data = [] for result in bulkNew.get_all_results_for_query_batch(batch): result = json.load(IteratorBytesIO(result)) for row in result: data.append(row) print json.dumps(row, indent=4, sort_keys=True) writeTxt(logPath+'Qoppo21.txt', data) batch = buildQueryBatch(bulkNew, sfConstants.leadSF21, 'lead', lastHourDateTime, intLead, newToOld = True) while not bulkNew.is_batch_done(batch): time.sleep(10) data = [] for result in bulkNew.get_all_results_for_query_batch(batch): result = json.load(IteratorBytesIO(result)) for row in result: if (row['MobilePhone'] or row['Phone'] or row['Email']) or row['Id'] in intLead: data.append(row) print json.dumps(row, indent=4, sort_keys=True) writeTxt(logPath+'Qlead21.txt', data)
def checkLeads1(bulk = bulkOld): job = bulk.create_query_job("Lead", contentType='JSON') batch = bulk.query(job, "Select id, Name, CreatedDate From Lead where NewSalesforceExtId__c = null and IsConverted = false") bulk.close_job(job) while not bulk.is_batch_done(batch): time.sleep(10) msg = '' for result in bulk.get_all_results_for_query_batch(batch): result = json.load(IteratorBytesIO(result)) leadCount = len(result) if leadCount > 0: msg = '<h1># of Leads in SF1 donot have a NewSalesforceExtId: '+str(leadCount)+'</h1>' for row in result: msg += '<p>'+row['Name']+' @ '+secondToTime(int(row['CreatedDate']))+' <a href="'+oldSFURl+str(row['Id'])+'" >SF1</a>' if msg: print msg errorEmail('Orphan leads', msg)
def _process_job_results(self, mapping, job_id, local_ids_for_batch): """Get the job results and process the results. If we're raising for row-level errors, do so; if we're inserting, store the new Ids.""" if mapping["action"] == "insert": id_table_name = self._reset_id_table(mapping) conn = self.session.connection() for batch_id, local_ids in local_ids_for_batch.items(): try: results_url = ( f"{self.bulk.endpoint}/job/{job_id}/batch/{batch_id}/result" ) # Download entire result file to a temporary file first # to avoid the server dropping connections with download_file(results_url, self.bulk) as f: self.logger.info( f" Downloaded results for batch {batch_id}") results_generator = self._generate_results_id_map( f, local_ids) if mapping["action"] == "insert": self._sql_bulk_insert_from_csv( conn, id_table_name, ("id", "sf_id"), IteratorBytesIO(results_generator), ) self.logger.info( f" Updated {id_table_name} for batch {batch_id}") else: for r in results_generator: pass # Drain generator to validate results except BulkDataException: raise except Exception as e: raise BulkDataException( f"Failed to download results for batch {batch_id} ({str(e)})" ) if mapping["action"] == "insert": self.session.commit()
def check1212(bulk = bulkOld, bulkDev = bulkNew): timeStamp = time.strftime("%d/%m/%Y") job = bulk.create_query_job("Opportunity", contentType='JSON') batch = bulk.query(job, "select Id, Name, CreatedDate, leadId__c from Opportunity where CreatedDate = today") bulk.close_job(job) while not bulk.is_batch_done(batch): time.sleep(10) oppoExt = [] convertedOppo = {} for result in bulk.get_all_results_for_query_batch(batch): result = json.load(IteratorBytesIO(result)) oppoCount = len(result) msg = '<h1># of Opportunities converted in SF1: '+str(oppoCount)+'</h1>' for row in result: # print json.dumps(row, indent=4, sort_keys=True) convertedOppo[row['LeadId__c']] = {'createdDate':row['CreatedDate'], 'oldSFId':row['Id'], 'oldLeadId':row['LeadId__c'], 'Name':row['Name']} oppoExt.append(row['LeadId__c']) job = bulk.create_query_job("Interaction__c", contentType='JSON') batch = bulk.query(job, "select Id, Name, CreatedDate, Subject__c, NewSalesforceExtID__c from Interaction__c where CreatedDate = today and (Subject__c = 'Closer Appointment' or Subject__c = 'CAD Appointment') AND CreatedById != '00539000005GkosAAC' order by CreatedDate") bulk.close_job(job) while not bulk.is_batch_done(batch): time.sleep(10) for result in bulk.get_all_results_for_query_batch(batch): result = json.load(IteratorBytesIO(result)) oppoCount = len(result) msgInteraction = '<h1># of Interactions created in SF1: '+str(oppoCount)+'</h1>' for row in result: msgInteraction += '<p>@'+secondToTime(int(row['CreatedDate']))+' <a href="'+oldSFURl+str(row['Id'])+'" >SF1</a>' if row['NewSalesforceExtID__c']: msgInteraction += ' & <a href="'+newSFURl+str(row['NewSalesforceExtID__c'])+'" >SF2</a></p>' else: msgInteraction += ' & NOT IN SF2</p>' print json.dumps(convertedOppo, indent=4, sort_keys=True) if oppoExt: job = bulkDev.create_query_job("Opportunity", contentType='JSON') queryStr = "select Id, OldSalesforceExtID__c from opportunity where OldSalesforceExtID__c in " + str(oppoExt).replace("u'","'").replace('[','(').replace(']',')').replace(' ','') + " order by CreatedDate" batch = bulkDev.query(job, queryStr) bulkDev.close_job(job) while not bulkDev.is_batch_done(batch): time.sleep(10) for result in bulkDev.get_all_results_for_query_batch(batch): result = json.load(IteratorBytesIO(result)) for row in result: msg += '<p>'+convertedOppo[row['OldSalesforceExtID__c']]['Name']+' @ '+secondToTime(int(convertedOppo[row['OldSalesforceExtID__c']]['createdDate']))+'</p>' msg += '<p><a href="'+oldSFURl+str(convertedOppo[row['OldSalesforceExtID__c']]['oldSFId'])+'" >SF1</a> & <a href="'+newSFURl+str(row['Id'])+'" >SF2</a></p>' convertedOppo.pop(row['OldSalesforceExtID__c'], None) print json.dumps(row, indent=4, sort_keys=True) for key in convertedOppo: print convertedOppo[key] msg += '<p>'+convertedOppo[key]['Name']+' @ '+secondToTime(convertedOppo[key]['createdDate'])+'</p>' msg += '<p><a href="'+oldSFURl+str(convertedOppo[key]['oldSFId'])+'" >SF1</a> & NOT IN SF2</p>' job = bulkDev.create_query_job("Event", contentType='JSON') batch = bulkDev.query(job, "select Id, CreatedDate, Subject, OldSalesforceExtID__c from Event where CreatedDate = today and (Subject = 'Closer Appointment' or Subject = 'CAD Appointment') AND CreatedById != '005f4000000JdJ4AAK' order by CreatedDate") bulkDev.close_job(job) while not bulkDev.is_batch_done(batch): time.sleep(10) convertedOppo = {} for result in bulkDev.get_all_results_for_query_batch(batch): result = json.load(IteratorBytesIO(result)) oppoCount = len(result) msgInteraction += '<h1># of Events created in SF2: '+str(oppoCount)+'</h1>' for row in result: msgInteraction += '<p>@'+secondToTime(int(row['CreatedDate']))+' <a href="'+newSFURl+str(row['Id'])+'" >SF2</a>' if row['Id']: msgInteraction += ' & <a href="'+oldSFURl+str(row['OldSalesforceExtID__c'])+'" >SF1</a></p>' else: msgInteraction += ' & NOT IN SF1</p>' # print json.dumps(row, indent=4, sort_keys=True) errorEmail(str(timeStamp)+' - Today 1212', msg+msgInteraction)
def create_and_run_bulk_job(self, job_type, object_name, primary_key, data): """ Note: If the specified job_type is Update or Upsert, you must provide the kwarg "primary_key" for Salesforce to identify records with. :param job_type: "Update", "Insert", or "Upsert". :type job_type: str :param object_name: The Name of the SF Object you are performing a bulk job on. Ex. object_name = "Account" :type object_name: str :param primary_key: :type primary_key: str :param data: Needs to be formatted as a list of dictionaries. Ex. data = [{'Id': 1, 'Name': 'Andrew'}, {'Id': 2, 'Name': 'Jerry'}] :type data: list """ # Ensure string arguments have their first letter capitalized. job_type = str.title(job_type) object_name = str.title(object_name) # Connect and authenticate to Salesforce. # Create Job. self.log.info(f'Creating {object_name} {job_type} job.') if job_type not in ['Insert', 'Update', 'Upsert']: raise ReferenceError( 'Invalid job_type not specified. Please use "Insert", "Update", or "Upsert".' ) try: if job_type == 'Insert': job = self.bulk.create_insert_job(object_name, contentType='CSV') elif job_type == 'Update': job = self.bulk.create_update_job(object_name, contentType='CSV') soql_query = f'select Id, {primary_key} from {object_name}' query_job = self.bulk.create_query_job(object_name, contentType='CSV') query_batch = self.bulk.query(query_job, soql_query) self.bulk.close_job(query_job) self.bulk.wait_for_batch(query_job, query_batch, timeout=60 * 10) query_results = list( self.bulk.get_all_results_for_query_batch(query_batch)) if len(query_results) == 1: id_map = json.load(IteratorBytesIO(query_results[0])) for rec in data: for row in id_map: if primary_key not in row: key_split = primary_key.split('.') row[primary_key] = row[key_split[0]][ key_split[1]] if rec[primary_key] == row[primary_key]: rec['Id'] = row['Id'] break else: raise OverflowError( 'Query Results larger than expected. Please review.') elif job_type == 'Upsert': job = self.bulk.create_upsert_job(object_name, external_id_name=primary_key, contentType='CSV') except Exception as job_creation_error: self.log.info( f'Unable to create {object_name} {job_type} Job. Please verify the value of the object_name variable.' ) self.log.exception( f'Encountered exception when creating job: {job_creation_error}' ) raise # Transform data from list of dictionaries into iterable CSV Content Type, # since the salesforce_bulk package provides a sweet class for it. csv_iter = CsvDictsAdapter(iter(data)) # Create a batch with the data and add it to the job. batch = self.bulk.post_batch(job, csv_iter) # Wait for the batch to complete. Default timeout is 10 minutes. self.bulk.wait_for_batch(job, batch, timeout=60 * 10) # Once the batch has been completed, get the results. results = self.bulk.get_batch_results(batch) # Close the Job. self.bulk.close_job(job) self.log.info( f'{job_type}, {object_name}, job has been successfully completed.') return results
def batch_query_records_dict(self, db_table, soql_query, concurrency='Serial'): """Execute bulk Salesforce soql queries and return results as generator of dictionaries. works only for PK CHUNKING enabled SF tables. Allows millions of record read. :param db_table: Database table name :param soql_query: Soql queries :return: If success, List of result record dictionaries; Else empty list """ self.bulk = SalesforceBulk(sessionId=self.session_id, host=self.instance) job = self.bulk.create_query_job(db_table, contentType="JSON", pk_chunking=True, concurrency=concurrency) try: batch = self.bulk.query(job, soql_query) batch_list = self.bulk.get_batch_list(job) print('first batch', batch_list[0]) batch_id = batch_list[0]['id'] job_id = batch_list[0]['jobId'] state = batch_list[0]['state'] while state == 'Queued' or state == 'InProgress': print( "Waiting for batch state Queued or InProgress to change " + state) sleep(10) state = self.bulk.batch_state(batch_id, job_id) batch_list = self.bulk.get_batch_list(job) print(f'number of batches: {len(batch_list)}') for item in batch_list: print('item', item) batch_id = item['id'] job_id = item['jobId'] state = item['state'] if state == 'NotProcessed': continue while not self.bulk.is_batch_done(batch_id, job_id): print( f"Waiting for batch query to complete batch_id:{batch_id}, job_id: {job_id}, state: {state}" ) sleep(10) state = self.bulk.batch_state(batch_id, job_id) total_retry_count = len(batch_list) retry = len(batch_list) lastIndex = 0 while retry > 0: print(f'retry {retry} times left') try: for result in list( self.bulk.get_all_results_for_query_batch( batch_id, job_id))[lastIndex:]: result = json.load(IteratorBytesIO(result)) lastIndex += 1 yield result break except requests.exceptions.ChunkedEncodingError as e: print('Chunking failed') retry -= 1 self.connect() self.bulk = SalesforceBulk(sessionId=self.session_id, host=self.instance) pass except Exception as e: print('There was an error') traceback.print_exc() retry -= 1 self.connect() self.bulk = SalesforceBulk(sessionId=self.session_id, host=self.instance) pass if retry <= 0: raise Exception( f'Retried {total_retry_count} times and it still failed' ) except BulkApiError as e: self.bulk.abort_job(self.job_id) raise e