def test_batch__character_limit(self): context = mock.Mock() step = BulkApiDmlOperation( sobject="Contact", operation=DataOperationType.INSERT, api_options={"batch_size": 2}, context=context, fields=["LastName"], ) records = [["Test"], ["Test2"], ["Test3"]] csv_rows = [step._serialize_csv_record(step.fields)] for r in records: csv_rows.append(step._serialize_csv_record(r)) char_limit = sum([len(r) for r in csv_rows]) - 1 # Ask for batches of three, but we # should get batches of 2 back results = list(step._batch(iter(records), n=3, char_limit=char_limit)) assert len(results) == 2 assert list(results[0]) == [ "LastName\r\n".encode("utf-8"), "Test\r\n".encode("utf-8"), "Test2\r\n".encode("utf-8"), ] assert list(results[1]) == [ "LastName\r\n".encode("utf-8"), "Test3\r\n".encode("utf-8"), ]
def test_end_to_end(self, download_mock): context = mock.Mock() context.bulk.endpoint = "https://test" context.bulk.create_job.return_value = "JOB" context.bulk.post_batch.side_effect = ["BATCH1", "BATCH2"] download_mock.return_value = io.StringIO("""id,success,created,error 003000000000001,true,true, 003000000000002,true,true, 003000000000003,false,false,error""") step = BulkApiDmlOperation( sobject="Contact", operation=DataOperationType.INSERT, api_options={}, context=context, fields=["LastName"], ) step._wait_for_job = mock.Mock() step._wait_for_job.return_value = DataOperationJobResult( DataOperationStatus.SUCCESS, [], 0, 0) step.start() step.load_records(iter([["Test"], ["Test2"], ["Test3"]])) step.end() assert step.job_result.status is DataOperationStatus.SUCCESS results = step.get_results() assert list(results) == [ DataOperationResult("003000000000001", True, None), DataOperationResult("003000000000002", True, None), DataOperationResult(None, False, "error"), ]
def test_get_results(self, download_mock): context = mock.Mock() context.bulk.endpoint = "https://test" download_mock.side_effect = [ io.StringIO("""id,success,created,error 003000000000001,true,true, 003000000000002,true,true,"""), io.StringIO("""id,success,created,error 003000000000003,false,false,error"""), ] step = BulkApiDmlOperation( sobject="Contact", operation=DataOperationType.INSERT, api_options={}, context=context, fields=["LastName"], ) step.job_id = "JOB" step.batch_ids = ["BATCH1", "BATCH2"] results = step.get_results() assert list(results) == [ DataOperationResult("003000000000001", True, None), DataOperationResult("003000000000002", True, None), DataOperationResult(None, False, "error"), ] download_mock.assert_has_calls([ mock.call("https://test/job/JOB/batch/BATCH1/result", context.bulk), mock.call("https://test/job/JOB/batch/BATCH2/result", context.bulk), ])
def test_contextmanager(self): context = mock.Mock() context.bulk.create_job.return_value = "JOB" step = BulkApiDmlOperation( sobject="Contact", operation=DataOperationType.INSERT, api_options={}, context=context, fields=["LastName"], ) step._wait_for_job = mock.Mock() step._wait_for_job.return_value = DataOperationJobResult( DataOperationStatus.SUCCESS, [], 0, 0) step.job_id = "JOB" with step: pass context.bulk.create_job.assert_called_once_with("Contact", "insert", contentType="CSV", concurrency="Parallel") assert step.job_id == "JOB" context.bulk.close_job.assert_called_once_with("JOB") step._wait_for_job.assert_called_once_with("JOB") assert step.job_result.status is DataOperationStatus.SUCCESS
def _execute_step( self, mapping: MappingStep) -> Union[DataOperationJobResult, MagicMock]: """Load data for a single step.""" if mapping.get("fields", {}).get("RecordTypeId"): conn = self.session.connection() self._load_record_types([mapping["sf_object"]], conn) self.session.commit() mapping["oid_as_pk"] = bool(mapping.get("fields", {}).get("Id")) bulk_mode = mapping.get("bulk_mode") or self.bulk_mode or "Parallel" step = BulkApiDmlOperation( sobject=mapping["sf_object"], operation=(DataOperationType.INSERT if mapping.get("action") == "insert" else DataOperationType.UPDATE), api_options={"bulk_mode": bulk_mode}, context=self, fields=self._get_columns(mapping), ) local_ids = [] step.start() step.load_records(self._stream_queried_data(mapping, local_ids)) step.end() if step.job_result.status is not DataOperationStatus.JOB_FAILURE: self._process_job_results(mapping, step, local_ids) return step.job_result
def test_get_results__failure(self, download_mock): context = mock.Mock() context.bulk.endpoint = "https://test" download_mock.return_value.side_effect = Exception step = BulkApiDmlOperation( sobject="Contact", operation=DataOperationType.INSERT, api_options={}, context=context, fields=["LastName"], ) step.job_id = "JOB" step.batch_ids = ["BATCH1", "BATCH2"] with self.assertRaises(BulkDataException): list(step.get_results())
def _run_task(self): self._validate_and_inject_namespace() for obj in self.sobjects: query = f"SELECT Id FROM {obj}" if self.options["where"]: query += f" WHERE {self.options['where']}" self.logger.info(f"Querying for {obj} objects") qs = BulkApiQueryOperation(sobject=obj, api_options={}, context=self, query=query) qs.query() if qs.job_result.status is not DataOperationStatus.SUCCESS: raise BulkDataException( f"Unable to query records for {obj}: {','.join(qs.job_result.job_errors)}" ) if not qs.job_result.records_processed: self.logger.info("No records found, skipping delete operation") continue self.logger.info(f"Deleting {self._object_description(obj)} ") ds = BulkApiDmlOperation( sobject=obj, operation=(DataOperationType.HARD_DELETE if self.options["hardDelete"] else DataOperationType.DELETE), api_options={}, context=self, fields=["Id"], ) ds.start() ds.load_records(qs.get_results()) ds.end() if ds.job_result.status not in [ DataOperationStatus.SUCCESS, DataOperationStatus.ROW_FAILURE, ]: raise BulkDataException( f"Unable to delete records for {obj}: {','.join(qs.job_result.job_errors)}" ) error_checker = RowErrorChecker(self.logger, self.options["ignore_row_errors"], self.row_warning_limit) for result in ds.get_results(): error_checker.check_for_row_error(result, result.id)
def test_start(self): context = mock.Mock() context.bulk.create_job.return_value = "JOB" step = BulkApiDmlOperation( sobject="Contact", operation=DataOperationType.INSERT, api_options={}, context=context, fields=["LastName"], ) step.start() context.bulk.create_job.assert_called_once_with( "Contact", "insert", contentType="CSV", concurrency="Parallel" ) assert step.job_id == "JOB"
def test_serialize_csv_record(self): context = mock.Mock() step = BulkApiDmlOperation( sobject="Contact", operation=DataOperationType.INSERT, api_options={"batch_size": 2}, context=context, fields=["Id", "FirstName", "LastName"], ) serialized = step._serialize_csv_record(step.fields) assert serialized == b"Id,FirstName,LastName\r\n" record = ["1", "Bob", "Ross"] serialized = step._serialize_csv_record(record) assert serialized == b"1,Bob,Ross\r\n" record = ["col1", "multiline\ncol2"] serialized = step._serialize_csv_record(record) assert serialized == b'col1,"multiline\ncol2"\r\n'
def test_batch(self): context = mock.Mock() step = BulkApiDmlOperation( sobject="Contact", operation=DataOperationType.INSERT, api_options={"batch_size": 2}, context=context, fields=["LastName"], ) records = iter([["Test"], ["Test2"], ["Test3"]]) results = list(step._batch(records, n=2)) assert len(results) == 2 assert list(results[0]) == [ "LastName\r\n".encode("utf-8"), "Test\r\n".encode("utf-8"), "Test2\r\n".encode("utf-8"), ] assert list(results[1]) == [ "LastName\r\n".encode("utf-8"), "Test3\r\n".encode("utf-8"), ]
def test_end__failed(self): context = mock.Mock() context.bulk.create_job.return_value = "JOB" step = BulkApiDmlOperation( sobject="Contact", operation=DataOperationType.INSERT, api_options={}, context=context, fields=["LastName"], ) step._wait_for_job = mock.Mock() step._wait_for_job.return_value = DataOperationJobResult( DataOperationStatus.JOB_FAILURE, [], 0, 0) step.job_id = "JOB" step.end() context.bulk.close_job.assert_called_once_with("JOB") step._wait_for_job.assert_called_once_with("JOB") assert step.job_result.status is DataOperationStatus.JOB_FAILURE