def test_object_names_bucket_query(sdc_builder, sdc_executor, couchbase, test_name, bucket_name): document_key_field = 'mydocname' key = 'mydocid' doc = {"data": "hello", document_key_field: key} raw_dict = dict(id=key) raw_data = json.dumps(raw_dict) query = f'SELECT * FROM `{bucket_name}` WHERE {document_key_field}=' + '"${record:value("/id")}"' cluster = couchbase.cluster try: # populate the database logger.info('Creating %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.create_bucket( CreateBucketSettings(name=bucket_name, bucket_type='couchbase', ram_quota_mb=256)) couchbase.wait_for_healthy_bucket(bucket_name) bucket = cluster.bucket(bucket_name) bucket.upsert(key, doc) cluster.query(f'CREATE PRIMARY INDEX ON `{bucket_name}`').execute() # build the pipeline builder = sdc_builder.get_pipeline_builder() origin = builder.add_stage('Dev Raw Data Source') origin.set_attributes(data_format='JSON', stop_after_first_batch=True, raw_data=raw_data) lookup = builder.add_stage('Couchbase Lookup') lookup.set_attributes( authentication_mode='USER', bucket=bucket_name, lookup_type='N1QL', n1ql_query=query, n1ql_mappings=[dict(property=bucket_name, sdcField='/output')], missing_value_behavior='ERROR') wiretap = builder.add_wiretap() origin >> lookup >> wiretap.destination pipeline = builder.build().configure_for_environment(couchbase) sdc_executor.add_pipeline(pipeline) sdc_executor.start_pipeline(pipeline).wait_for_finished() output_records = wiretap.output_records assert len( output_records ) == 1, 'Number of returned records should equal the number of matching records stored' assert output_records[0].field['output'] == doc finally: try: logger.info('Deleting %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.drop_bucket(bucket_name) except Exception as e: logger.error(f"Can't delete bucket: {e}")
def test_lookup_kv(sdc_builder, sdc_executor, couchbase, test_name, input, expected_out, expected_error, missing_value_behavior): bucket_name = get_random_string(string.ascii_letters, 10).lower() doc = {'id': 'id1', 'data': 'hello'} raw_dict = dict(id=input) raw_data = json.dumps(raw_dict) cluster = couchbase.cluster try: # populate the database logger.info('Creating %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.create_bucket( CreateBucketSettings(name=bucket_name, bucket_type='couchbase', ram_quota_mb=256)) couchbase.wait_for_healthy_bucket(bucket_name) bucket = cluster.bucket(bucket_name) bucket.upsert(doc['id'], doc) # build the pipeline builder = sdc_builder.get_pipeline_builder() origin = builder.add_stage('Dev Raw Data Source') origin.set_attributes(data_format='JSON', stop_after_first_batch=True, raw_data=raw_data) lookup = builder.add_stage('Couchbase Lookup') lookup.set_attributes(authentication_mode='USER', bucket=bucket_name, lookup_type='KV', document_key='${record:value("/id")}', sdc_field='/output', missing_value_behavior=missing_value_behavior) wiretap = builder.add_wiretap() origin >> lookup >> wiretap.destination pipeline = builder.build().configure_for_environment(couchbase) sdc_executor.add_pipeline(pipeline) sdc_executor.start_pipeline(pipeline).wait_for_finished() output_records = wiretap.output_records error_records = wiretap.error_records assert len(output_records) == len(expected_out) assert len(error_records) == len(expected_error) if expected_out: assert output_records[0].field == expected_out[0] if expected_error: assert error_records[0].field == expected_error[0] finally: try: logger.info('Deleting %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.drop_bucket(bucket_name) except Exception as e: logger.error(f"Can't delete bucket: {e}")
def test_data_format_binary(sdc_builder, sdc_executor, couchbase): bucket_name = get_random_string(string.ascii_letters, 10) document_key = 'id' batch_size = 1 cluster = couchbase.cluster # Build the pipeline builder = sdc_builder.get_pipeline_builder() source = builder.add_stage('Dev Data Generator').set_attributes( batch_size=batch_size, fields_to_generate=[{ "type": "BYTE_ARRAY", "field": "data" }]) destination = builder.add_stage('Couchbase', type='destination') destination.set_attributes(authentication_mode='USER', bucket=bucket_name, document_key=document_key, data_format='BINARY', binary_field_path="/data") wiretap = builder.add_wiretap() source >> destination source >= wiretap.destination pipeline = builder.build().configure_for_environment(couchbase) sdc_executor.add_pipeline(pipeline) try: logger.info('Creating %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.create_bucket( CreateBucketSettings(name=bucket_name, bucket_type='couchbase', ram_quota_mb=256)) couchbase.wait_for_healthy_bucket(bucket_name) sdc_executor.start_pipeline( pipeline).wait_for_pipeline_output_records_count(batch_size) sdc_executor.stop_pipeline(pipeline) history = sdc_executor.get_pipeline_history(pipeline) num_records = history.latest.metrics.counter( 'pipeline.batchInputRecords.counter').count logger.info(f"Wrote {num_records} records") assert num_records == len(wiretap.output_records) bucket = cluster.bucket(bucket_name) assert bucket.get( document_key).value == wiretap.output_records[0].field['data'] finally: try: logger.info('Deleting %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.drop_bucket(bucket_name) except Exception as e: logger.error(f"Can't delete bucket: {e}")
def test_data_types_query(sdc_builder, sdc_executor, couchbase, input, test_name, expected_type, expected_value): if input is None: pytest.skip('Lookup Processor queries deal with null values as errors rather than treating them as values') bucket_name = get_random_string(string.ascii_letters, 10).lower() key = 'mydocid' doc = {'id': key, 'data': input} raw_dict = dict(id=key) raw_data = json.dumps(raw_dict) query = f'SELECT data FROM {bucket_name} WHERE ' + 'id="${record:value("/id")}"' cluster = couchbase.cluster try: # populate the database logger.info('Creating %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.create_bucket(CreateBucketSettings(name=bucket_name, bucket_type='couchbase', ram_quota_mb=256)) couchbase.wait_for_healthy_bucket(bucket_name) bucket = cluster.bucket(bucket_name) bucket.upsert(key, doc) cluster.query(f'CREATE PRIMARY INDEX ON `{bucket_name}`').execute() # build the pipeline builder = sdc_builder.get_pipeline_builder() origin = builder.add_stage('Dev Raw Data Source') origin.set_attributes(data_format='JSON', stop_after_first_batch=True, raw_data=raw_data) lookup = builder.add_stage('Couchbase Lookup') lookup.set_attributes(authentication_mode='USER', bucket=bucket_name, lookup_type='N1QL', n1ql_query=query, n1ql_mappings=[dict(property='data', sdcField='/output')], missing_value_behavior='ERROR') wiretap = builder.add_wiretap() origin >> lookup >> wiretap.destination pipeline = builder.build().configure_for_environment(couchbase) sdc_executor.add_pipeline(pipeline) sdc_executor.start_pipeline(pipeline).wait_for_finished() output_records = wiretap.output_records assert len(output_records) == 1 assert output_records[0].field['output'].type == expected_type assert output_records[0].field['output'] == expected_value finally: try: logger.info('Deleting %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.drop_bucket(bucket_name) except Exception as e: logger.error(f"Can't delete bucket: {e}")
def test_couchbase_destination(sdc_builder, sdc_executor, couchbase): """ Send simple JSON text into Couchbase destination from Dev Raw Data Source and assert Couchbase has received it. The pipeline looks like: dev_raw_data_source >> couchbase_destination """ couchbase_host = f'{couchbase.hostname}:{couchbase.port}' bucket_name = get_random_string(string.ascii_letters, 10) document_key_field = 'mydocname' raw_dict = dict(f1='abc', f2='xyz', f3='lmn') raw_dict[document_key_field] = 'mydocid' raw_data = json.dumps(raw_dict) builder = sdc_builder.get_pipeline_builder() dev_raw_data_source = builder.add_stage('Dev Raw Data Source') dev_raw_data_source.set_attributes(data_format='JSON', raw_data=raw_data, stop_after_first_batch=True) couchbase_destination = builder.add_stage('Couchbase', type='destination') if Version(sdc_builder.version) < Version('3.9.0'): couchbase_destination.set_attributes( database_version='VERSION5', unique_document_key_field=document_key_field, bucket=bucket_name, couchbase_user_name=couchbase.username, couchbase_user_password=couchbase.password, url=couchbase_host) else: couchbase_destination.set_attributes(authentication_mode='USER', document_key="${record:value('/" + document_key_field + "')}", bucket=bucket_name, user_name=couchbase.username, password=couchbase.password, node_list=couchbase_host) dev_raw_data_source >> couchbase_destination pipeline = builder.build(title='Couchbase Destination pipeline' ).configure_for_environment(couchbase) sdc_executor.add_pipeline(pipeline) try: logger.info('Creating %s Couchbase bucket ...', bucket_name) couchbase.admin.bucket_create(name=bucket_name, bucket_type='couchbase', ram_quota=256) couchbase.wait_for_healthy_bucket(bucket_name) sdc_executor.start_pipeline(pipeline).wait_for_finished() bucket = couchbase.cluster.open_bucket(bucket_name) doc_value = bucket.get(raw_dict[document_key_field]).value assert doc_value == raw_dict finally: logger.info('Deleting %s Couchbase bucket ...', bucket_name) couchbase.admin.bucket_delete(bucket_name)
def test_data_format_protobuf(sdc_builder, sdc_executor, couchbase): bucket_name = get_random_string(string.ascii_letters, 10) document_key = 'id' raw_data = '{"first_name": "Martin","last_name": "Balzamo"}' expected = '\x11\x06Martin\x12\x07Balzamo' cluster = couchbase.cluster # Build the pipeline builder = sdc_builder.get_pipeline_builder() source = builder.add_stage('Dev Raw Data Source').set_attributes( data_format='JSON', raw_data=raw_data, stop_after_first_batch=True) destination = builder.add_stage('Couchbase', type='destination') destination.set_attributes(authentication_mode='USER', bucket=bucket_name, document_key=document_key, data_format='PROTOBUF', message_type='Contact', protobuf_descriptor_file=PROTOBUF_FILE_PATH) source >> destination pipeline = builder.build().configure_for_environment(couchbase) sdc_executor.add_pipeline(pipeline) try: logger.info('Creating %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.create_bucket( CreateBucketSettings(name=bucket_name, bucket_type='couchbase', ram_quota_mb=256)) couchbase.wait_for_healthy_bucket(bucket_name) sdc_executor.start_pipeline(pipeline).wait_for_finished() history = sdc_executor.get_pipeline_history(pipeline) num_records = history.latest.metrics.counter( 'pipeline.batchOutputRecords.counter').count logger.info(f"Wrote {num_records} records") assert num_records == 1, 'Number of records stored should equal the number of records that entered the pipeline' bucket = cluster.bucket(bucket_name) doc_value = bucket.get(document_key).value # Decode the bytes object returned by Couchbase and remove any record separators (newline characters) contents = doc_value.decode('ascii').replace('\n', '') assert contents == expected finally: if pipeline and sdc_executor.get_pipeline_status( pipeline).response.json().get('status') == 'RUNNING': sdc_executor.stop_pipeline(pipeline) try: logger.info('Deleting %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.drop_bucket(bucket_name) except Exception as e: logger.error(f"Can't delete bucket: {e}")
def test_multiple_batches(sdc_builder, sdc_executor, couchbase, batch_size): bucket_name = get_random_string(string.ascii_letters, 10) batches = 3 # Build the pipeline builder = sdc_builder.get_pipeline_builder() source = builder.add_stage('Dev Data Generator') source.batch_size = batch_size source.fields_to_generate = [{"type": "LONG_SEQUENCE", "field": "seq"}] cluster = couchbase.cluster destination = builder.add_stage('Couchbase', type='destination') destination.set_attributes(authentication_mode='USER', bucket=bucket_name, document_key='${record:value("/seq")}') wiretap = builder.add_wiretap() source >> destination source >= wiretap.destination pipeline = builder.build().configure_for_environment(couchbase) sdc_executor.add_pipeline(pipeline) try: logger.info('Creating %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.create_bucket( CreateBucketSettings(name=bucket_name, bucket_type='couchbase', ram_quota_mb=256)) couchbase.wait_for_healthy_bucket(bucket_name) sdc_executor.start_pipeline( pipeline).wait_for_pipeline_output_records_count(batches * batch_size) sdc_executor.stop_pipeline(pipeline) history = sdc_executor.get_pipeline_history(pipeline) num_records = history.latest.metrics.counter( 'pipeline.batchInputRecords.counter').count logger.info(f"Wrote {num_records} records") assert num_records == len(wiretap.output_records) bucket = cluster.bucket(bucket_name) for i in range(num_records): assert bucket.get(str(i)).value == wiretap.output_records[i].field finally: try: logger.info('Deleting %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.drop_bucket(bucket_name) except Exception as e: logger.error(f"Can't delete bucket: {e}")
def test_object_names_bucket(sdc_builder, sdc_executor, couchbase, test_name, bucket_generator): """Test using different kinds of bucket names, asserting whether Couchbase receives the data""" bucket_name = bucket_generator() document_key_field = 'mydocname' raw_dict = dict(f1='abc', f2='xyz', f3='lmn') raw_dict[document_key_field] = 'mydocid' raw_data = json.dumps(raw_dict) cluster = couchbase.cluster # Build the pipeline builder = sdc_builder.get_pipeline_builder() source = builder.add_stage('Dev Raw Data Source').set_attributes( data_format='JSON', raw_data=raw_data, stop_after_first_batch=True) destination = builder.add_stage('Couchbase', type='destination') destination.set_attributes(authentication_mode='USER', bucket=bucket_name, document_key="${record:value('/" + document_key_field + "')}") source >> destination pipeline = builder.build().configure_for_environment(couchbase) sdc_executor.add_pipeline(pipeline) try: logger.info('Creating %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.create_bucket( CreateBucketSettings(name=bucket_name, bucket_type='couchbase', ram_quota_mb=256)) couchbase.wait_for_healthy_bucket(bucket_name.replace('%', '%25')) sdc_executor.start_pipeline(pipeline).wait_for_finished() # if the bucket name contains a percent sign, it gets interpreted as an escape character in the HTTP call made # by the open_bucket() method, so the ascii for percent (i.e. %25) should be used instead bucket = cluster.bucket(bucket_name.replace('%', '%25')) doc_value = bucket.get(raw_dict[document_key_field]).value assert doc_value == raw_dict finally: if pipeline and sdc_executor.get_pipeline_status( pipeline).response.json().get('status') == 'RUNNING': sdc_executor.stop_pipeline(pipeline) try: logger.info('Deleting %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.drop_bucket(bucket_name) except Exception as e: logger.error(f"Can't delete bucket: {e}")
def test_data_format_json(sdc_builder, sdc_executor, couchbase): bucket_name = get_random_string(string.ascii_letters, 10) document_key_field = 'mydocname' raw_dict = dict(f1='abc', f2='xyz', f3='lmn') raw_dict[document_key_field] = 'mydocid' raw_data = json.dumps(raw_dict) cluster = couchbase.cluster # Build the pipeline builder = sdc_builder.get_pipeline_builder() source = builder.add_stage('Dev Raw Data Source').set_attributes( data_format='JSON', raw_data=raw_data, stop_after_first_batch=True) destination = builder.add_stage('Couchbase', type='destination') destination.set_attributes(authentication_mode='USER', bucket=bucket_name, document_key="${record:value('/" + document_key_field + "')}", data_format='JSON') source >> destination pipeline = builder.build().configure_for_environment(couchbase) sdc_executor.add_pipeline(pipeline) try: logger.info('Creating %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.create_bucket( CreateBucketSettings(name=bucket_name, bucket_type='couchbase', ram_quota_mb=256)) couchbase.wait_for_healthy_bucket(bucket_name) sdc_executor.start_pipeline(pipeline).wait_for_finished() bucket = cluster.bucket(bucket_name) doc_value = bucket.get(raw_dict[document_key_field]).value assert doc_value == raw_dict finally: if pipeline and sdc_executor.get_pipeline_status( pipeline).response.json().get('status') == 'RUNNING': sdc_executor.stop_pipeline(pipeline) try: logger.info('Deleting %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.drop_bucket(bucket_name) except Exception as e: logger.error(f"Can't delete bucket: {e}")
def test_couchbase_destination(sdc_builder, sdc_executor, couchbase): """ Send simple JSON text into Couchbase destination from Dev Raw Data Source and assert Couchbase has received it. The pipeline looks like: dev_raw_data_source >> couchbase_destination """ bucket_name = get_random_string(string.ascii_letters, 10) document_key_field = 'mydocname' raw_dict = dict(f1='abc', f2='xyz', f3='lmn') raw_dict[document_key_field] = 'mydocid' raw_data = json.dumps(raw_dict) cluster = couchbase.cluster builder = sdc_builder.get_pipeline_builder() dev_raw_data_source = builder.add_stage('Dev Raw Data Source') dev_raw_data_source.set_attributes(data_format='JSON', raw_data=raw_data, stop_after_first_batch=True) couchbase_destination = builder.add_stage('Couchbase', type='destination') couchbase_destination.set_attributes(authentication_mode='USER', document_key="${record:value('/" + document_key_field + "')}", bucket=bucket_name) dev_raw_data_source >> couchbase_destination pipeline = builder.build(title='Couchbase Destination pipeline' ).configure_for_environment(couchbase) sdc_executor.add_pipeline(pipeline) try: logger.info('Creating %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.create_bucket( CreateBucketSettings(name=bucket_name, bucket_type='couchbase', ram_quota_mb=256)) couchbase.wait_for_healthy_bucket(bucket_name) sdc_executor.start_pipeline(pipeline).wait_for_finished() bucket = cluster.bucket(bucket_name) doc_value = bucket.get(raw_dict[document_key_field]).value assert doc_value == raw_dict finally: logger.info('Deleting %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.drop_bucket(bucket_name)
def test_couchbase_destination(sdc_builder, sdc_executor, couchbase): """ Send simple JSON text into Couchbase destination from Dev Raw Data Source and assert Couchbase has received it. The pipeline looks like: dev_raw_data_source >> couchbase_destination """ bucket_name = get_random_string(string.ascii_letters, 10) document_key_field = 'mydocname' raw_dict = dict(f1='abc', f2='xyz', f3='lmn') raw_dict[document_key_field] = 'mydocid' raw_data = json.dumps(raw_dict) builder = sdc_builder.get_pipeline_builder() dev_raw_data_source = builder.add_stage('Dev Raw Data Source') dev_raw_data_source.set_attributes(data_format='JSON', raw_data=raw_data, stop_after_first_batch=True) couchbase_destination = builder.add_stage('Couchbase', type='destination') couchbase_destination.set_attributes( database_version='VERSION{}'.format(couchbase.version.split('.')[0]), unique_document_key_field=document_key_field, bucket=bucket_name) dev_raw_data_source >> couchbase_destination pipeline = builder.build(title='Couchbase Destination pipeline' ).configure_for_environment(couchbase) sdc_executor.add_pipeline(pipeline) try: logger.info('Creating %s Couchbase bucket ...', bucket_name) couchbase.admin.bucket_create(name=bucket_name, bucket_type='couchbase', ram_quota=256) couchbase.wait_for_healthy_bucket(bucket_name) sdc_executor.start_pipeline(pipeline).wait_for_finished() bucket = couchbase.cluster.open_bucket(bucket_name) doc_value = bucket.get(raw_dict[document_key_field]).value assert doc_value == raw_dict finally: logger.info('Deleting %s Couchbase bucket ...', bucket_name) couchbase.admin.bucket_delete(bucket_name)
def test_lookup_query(sdc_builder, sdc_executor, couchbase, test_name, input, expected, multiple_value_behavior, missing_value_behavior): bucket_name = get_random_string(string.ascii_letters, 10).lower() docs = [{'id': 'id1', 'data': 'hello'}, {'id': 'id2', 'data': 'hello'}, {'id': 'id3', 'data': 'hello'}] raw_dict = dict(criteria=input) raw_data = json.dumps(raw_dict) query = f"SELECT id FROM {bucket_name} WHERE " + '${record:value("/criteria")}' cluster = couchbase.cluster try: # populate the database logger.info('Creating %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.create_bucket(CreateBucketSettings(name=bucket_name, bucket_type='couchbase', ram_quota_mb=256)) couchbase.wait_for_healthy_bucket(bucket_name) bucket = cluster.bucket(bucket_name) for doc in docs: bucket.upsert(doc['id'], doc) cluster.query(f'CREATE PRIMARY INDEX ON `{bucket_name}`').execute() # build the pipeline builder = sdc_builder.get_pipeline_builder() origin = builder.add_stage('Dev Raw Data Source') origin.set_attributes(data_format='JSON', stop_after_first_batch=True, raw_data=raw_data) lookup = builder.add_stage('Couchbase Lookup') lookup.set_attributes(authentication_mode='USER', bucket=bucket_name, lookup_type='N1QL', n1ql_query=query, n1ql_mappings=[dict(property='id', sdcField='/output')], multiple_value_behavior=multiple_value_behavior, missing_value_behavior=missing_value_behavior) wiretap = builder.add_wiretap() origin >> lookup >> wiretap.destination pipeline = builder.build().configure_for_environment(couchbase) sdc_executor.add_pipeline(pipeline) sdc_executor.start_pipeline(pipeline).wait_for_finished() output_records = wiretap.output_records error_records = wiretap.error_records print('output:', output_records) if missing_value_behavior == 'ERROR': # The input record should pass through to error records without an output field assert len(error_records) == 1 assert 'output' not in error_records[0].field elif not expected: # The input record should pass through to output records without an output field assert len(output_records) == 1 assert 'output' not in output_records[0].field else: assert len(output_records) == len(expected) # Check that the output records are as expected, allowing for reordering output_list = [record.field['output'] for record in output_records] assert Counter(output_list) == Counter(expected) finally: try: logger.info('Deleting %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.drop_bucket(bucket_name) except Exception as e: logger.error(f"Can't delete bucket: {e}")
def test_multiple_batches_query(sdc_builder, sdc_executor, couchbase, batch_size): bucket_name = get_random_string(string.ascii_letters, 10).lower() docs = [{"id": "1", "data": 10}, {"id": "2", "data": 20}, {"id": "3", "data": 30}] batches = 3 query = f'SELECT data FROM {bucket_name} WHERE ' + 'id="${record:value("/lookup")}"' cluster = couchbase.cluster # populate the database logger.info('Creating %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.create_bucket(CreateBucketSettings(name=bucket_name, bucket_type='couchbase', ram_quota_mb=256)) couchbase.wait_for_healthy_bucket(bucket_name) bucket = cluster.bucket(bucket_name) for doc in docs: bucket.upsert(doc["id"], doc) cluster.query(f'CREATE PRIMARY INDEX ON `{bucket_name}`').execute() # build the pipeline builder = sdc_builder.get_pipeline_builder() origin = builder.add_stage('Dev Data Generator') origin.fields_to_generate = [{ "type": "LONG_SEQUENCE", "field": "seq" }] expression = builder.add_stage('Expression Evaluator') expression.field_expressions = [{ 'fieldToSet': '/lookup', 'expression': '${record:value("/seq") % 3 + 1}' }] lookup = builder.add_stage('Couchbase Lookup') lookup.set_attributes(authentication_mode='USER', bucket=bucket_name, lookup_type='N1QL', n1ql_query=query, n1ql_mappings=[dict(property='data', sdcField='/output')], missing_value_behavior='PASS') wiretap = builder.add_wiretap() origin >> expression >> lookup >> wiretap.destination pipeline = builder.build().configure_for_environment(couchbase) sdc_executor.add_pipeline(pipeline) try: # run the pipeline sdc_executor.start_pipeline(pipeline).wait_for_pipeline_output_records_count(batches * batch_size) sdc_executor.stop_pipeline(pipeline) history = sdc_executor.get_pipeline_history(pipeline) record_count = history.latest.metrics.counter('pipeline.batchInputRecords.counter').count logger.info(f"Wrote {record_count} records") records = wiretap.output_records assert len(records) == record_count # Verify each record def sort_func(entry): return entry.field['seq'].value records.sort(key=sort_func) expected_number = 0 for record in records: assert record.field['seq'] == expected_number assert record.field['lookup'] == expected_number % 3 + 1 assert record.field['output'] == (expected_number % 3 + 1) * 10 expected_number = expected_number + 1 finally: try: logger.info('Deleting %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.drop_bucket(bucket_name) except Exception as e: logger.error(f"Can't delete bucket: {e}")
def test_data_types(sdc_builder, sdc_executor, couchbase, input, converter_type, expected): bucket_name = get_random_string(string.ascii_letters, 10) document_key_field = 'mydocname' raw_dict = {"value": input, document_key_field: 'mydocid'} raw_data = json.dumps(raw_dict) cluster = couchbase.cluster # Build the pipeline builder = sdc_builder.get_pipeline_builder() source = builder.add_stage('Dev Raw Data Source').set_attributes( data_format='JSON', raw_data=raw_data, stop_after_first_batch=True) converter = builder.add_stage('Field Type Converter') converter.conversion_method = 'BY_FIELD' converter.field_type_converter_configs = [{ 'fields': ['/value'], 'targetType': converter_type, 'dataLocale': 'en,US', 'dateFormat': 'YYYY_MM_DD_HH_MM_SS', 'zonedDateTimeFormat': 'ISO_OFFSET_DATE_TIME', 'scale': 2 }] destination = builder.add_stage('Couchbase', type='destination') destination.set_attributes(authentication_mode='USER', bucket=bucket_name, document_key="${record:value('/" + document_key_field + "')}") source >> converter >> destination pipeline = builder.build().configure_for_environment(couchbase) pipeline.configuration["shouldRetry"] = False sdc_executor.add_pipeline(pipeline) try: logger.info('Creating %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.create_bucket( CreateBucketSettings(name=bucket_name, bucket_type='couchbase', ram_quota_mb=256)) couchbase.wait_for_healthy_bucket(bucket_name) sdc_executor.start_pipeline(pipeline).wait_for_finished() bucket = cluster.bucket(bucket_name) doc_value = bucket.get(raw_dict[document_key_field]).value assert len(doc_value) == len(raw_dict) assert doc_value['value'] == expected finally: if pipeline and sdc_executor.get_pipeline_status( pipeline).response.json().get('status') == 'RUNNING': sdc_executor.stop_pipeline(pipeline) try: logger.info('Deleting %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.drop_bucket(bucket_name) except Exception as e: logger.error(f"Can't delete bucket: {e}")
def test_data_format_sdc_record(sdc_builder, sdc_executor, couchbase): bucket_name = get_random_string(string.ascii_letters, 10) document_key_field = 'field1' json_data = [{ "field1": "abc", "field2": "def", "field3": "ghi" }, { "field1": "jkl", "field2": "mno", "field3": "pqr" }] raw_data = ''.join(json.dumps(record) for record in json_data) cluster = couchbase.cluster # Build the pipeline builder = sdc_builder.get_pipeline_builder() source = builder.add_stage('Dev Raw Data Source').set_attributes( data_format='JSON', raw_data=raw_data, stop_after_first_batch=True) destination = builder.add_stage('Couchbase', type='destination') destination.set_attributes(authentication_mode='USER', bucket=bucket_name, document_key="${record:value('/" + document_key_field + "')}", data_format='SDC_JSON') source >> destination pipeline = builder.build().configure_for_environment(couchbase) sdc_executor.add_pipeline(pipeline) try: logger.info('Creating %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.create_bucket( CreateBucketSettings(name=bucket_name, bucket_type='couchbase', ram_quota_mb=256)) couchbase.wait_for_healthy_bucket(bucket_name) sdc_executor.start_pipeline(pipeline).wait_for_finished() history = sdc_executor.get_pipeline_history(pipeline) num_records = history.latest.metrics.counter( 'pipeline.batchOutputRecords.counter').count logger.info(f"Wrote {num_records} records") assert num_records == len(json_data) bucket = cluster.bucket(bucket_name) for i in range(len(json_data)): doc_value = bucket.get(json_data[i][document_key_field]).value # Decode the bytes object and disregard the first character (0xa1) contents = doc_value.decode('latin1')[1:] # Decode the SDC Record JSON into a dictionary containing its value dictionary = json.loads(contents) value = sdc_value_reader(dictionary['value']) assert value == json_data[i] finally: if pipeline and sdc_executor.get_pipeline_status( pipeline).response.json().get('status') == 'RUNNING': sdc_executor.stop_pipeline(pipeline) try: logger.info('Deleting %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.drop_bucket(bucket_name) except Exception as e: logger.error(f"Can't delete bucket: {e}")
def test_data_format_avro(sdc_builder, sdc_executor, couchbase): bucket_name = get_random_string(string.ascii_letters, 10) document_key = 'id' DATA = { 'name': 'boss', 'age': 60, 'emails': ['*****@*****.**', '*****@*****.**'], 'boss': None } SCHEMA = { 'namespace': 'example.avro', 'type': 'record', 'name': 'Employee', 'fields': [{ 'name': 'name', 'type': 'string' }, { 'name': 'age', 'type': 'int' }, { 'name': 'emails', 'type': { 'type': 'array', 'items': 'string' } }, { 'name': 'boss', 'type': ['Employee', 'null'] }] } cluster = couchbase.cluster # Build the pipeline builder = sdc_builder.get_pipeline_builder() source = builder.add_stage('Dev Raw Data Source').set_attributes( data_format='JSON', raw_data=json.dumps(DATA), stop_after_first_batch=True) destination = builder.add_stage('Couchbase', type='destination') destination.set_attributes(authentication_mode='USER', bucket=bucket_name, document_key=document_key, data_format='AVRO', avro_schema=json.dumps(SCHEMA), avro_schema_location='INLINE') source >> destination pipeline = builder.build().configure_for_environment(couchbase) sdc_executor.add_pipeline(pipeline) try: logger.info('Creating %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.create_bucket( CreateBucketSettings(name=bucket_name, bucket_type='couchbase', ram_quota_mb=256)) couchbase.wait_for_healthy_bucket(bucket_name) sdc_executor.start_pipeline(pipeline).wait_for_finished() bucket = cluster.bucket(bucket_name) doc_value = bucket.get(document_key).value # decode the bytes object returned by Couchbase file = BytesIO(doc_value) reader = DataFileReader(file, DatumReader()) records = [record for record in reader] assert len( records ) == 1, 'Number of records stored should equal number of records that entered the pipeline' assert records[0] == DATA reader.close() finally: if pipeline and sdc_executor.get_pipeline_status( pipeline).response.json().get('status') == 'RUNNING': sdc_executor.stop_pipeline(pipeline) try: logger.info('Deleting %s Couchbase bucket ...', bucket_name) couchbase.bucket_manager.drop_bucket(bucket_name) except Exception as e: logger.error(f"Can't delete bucket: {e}")