def test_object_names_bucket_query(sdc_builder, sdc_executor, couchbase,
                                   test_name, bucket_name):
    document_key_field = 'mydocname'
    key = 'mydocid'
    doc = {"data": "hello", document_key_field: key}
    raw_dict = dict(id=key)
    raw_data = json.dumps(raw_dict)
    query = f'SELECT * FROM `{bucket_name}` WHERE {document_key_field}=' + '"${record:value("/id")}"'
    cluster = couchbase.cluster

    try:
        # populate the database
        logger.info('Creating %s Couchbase bucket ...', bucket_name)
        couchbase.bucket_manager.create_bucket(
            CreateBucketSettings(name=bucket_name,
                                 bucket_type='couchbase',
                                 ram_quota_mb=256))
        couchbase.wait_for_healthy_bucket(bucket_name)

        bucket = cluster.bucket(bucket_name)
        bucket.upsert(key, doc)
        cluster.query(f'CREATE PRIMARY INDEX ON `{bucket_name}`').execute()

        # build the pipeline
        builder = sdc_builder.get_pipeline_builder()

        origin = builder.add_stage('Dev Raw Data Source')
        origin.set_attributes(data_format='JSON',
                              stop_after_first_batch=True,
                              raw_data=raw_data)

        lookup = builder.add_stage('Couchbase Lookup')
        lookup.set_attributes(
            authentication_mode='USER',
            bucket=bucket_name,
            lookup_type='N1QL',
            n1ql_query=query,
            n1ql_mappings=[dict(property=bucket_name, sdcField='/output')],
            missing_value_behavior='ERROR')

        wiretap = builder.add_wiretap()

        origin >> lookup >> wiretap.destination

        pipeline = builder.build().configure_for_environment(couchbase)

        sdc_executor.add_pipeline(pipeline)
        sdc_executor.start_pipeline(pipeline).wait_for_finished()

        output_records = wiretap.output_records
        assert len(
            output_records
        ) == 1, 'Number of returned records should equal the number of matching records stored'
        assert output_records[0].field['output'] == doc
    finally:
        try:
            logger.info('Deleting %s Couchbase bucket ...', bucket_name)
            couchbase.bucket_manager.drop_bucket(bucket_name)
        except Exception as e:
            logger.error(f"Can't delete bucket: {e}")
def test_lookup_kv(sdc_builder, sdc_executor, couchbase, test_name, input,
                   expected_out, expected_error, missing_value_behavior):
    bucket_name = get_random_string(string.ascii_letters, 10).lower()
    doc = {'id': 'id1', 'data': 'hello'}
    raw_dict = dict(id=input)
    raw_data = json.dumps(raw_dict)
    cluster = couchbase.cluster

    try:
        # populate the database
        logger.info('Creating %s Couchbase bucket ...', bucket_name)
        couchbase.bucket_manager.create_bucket(
            CreateBucketSettings(name=bucket_name,
                                 bucket_type='couchbase',
                                 ram_quota_mb=256))
        couchbase.wait_for_healthy_bucket(bucket_name)

        bucket = cluster.bucket(bucket_name)
        bucket.upsert(doc['id'], doc)

        # build the pipeline
        builder = sdc_builder.get_pipeline_builder()

        origin = builder.add_stage('Dev Raw Data Source')
        origin.set_attributes(data_format='JSON',
                              stop_after_first_batch=True,
                              raw_data=raw_data)

        lookup = builder.add_stage('Couchbase Lookup')
        lookup.set_attributes(authentication_mode='USER',
                              bucket=bucket_name,
                              lookup_type='KV',
                              document_key='${record:value("/id")}',
                              sdc_field='/output',
                              missing_value_behavior=missing_value_behavior)

        wiretap = builder.add_wiretap()

        origin >> lookup >> wiretap.destination

        pipeline = builder.build().configure_for_environment(couchbase)

        sdc_executor.add_pipeline(pipeline)
        sdc_executor.start_pipeline(pipeline).wait_for_finished()

        output_records = wiretap.output_records
        error_records = wiretap.error_records

        assert len(output_records) == len(expected_out)
        assert len(error_records) == len(expected_error)
        if expected_out:
            assert output_records[0].field == expected_out[0]
        if expected_error:
            assert error_records[0].field == expected_error[0]
    finally:
        try:
            logger.info('Deleting %s Couchbase bucket ...', bucket_name)
            couchbase.bucket_manager.drop_bucket(bucket_name)
        except Exception as e:
            logger.error(f"Can't delete bucket: {e}")
def test_data_format_binary(sdc_builder, sdc_executor, couchbase):
    bucket_name = get_random_string(string.ascii_letters, 10)
    document_key = 'id'
    batch_size = 1
    cluster = couchbase.cluster

    # Build the pipeline
    builder = sdc_builder.get_pipeline_builder()

    source = builder.add_stage('Dev Data Generator').set_attributes(
        batch_size=batch_size,
        fields_to_generate=[{
            "type": "BYTE_ARRAY",
            "field": "data"
        }])

    destination = builder.add_stage('Couchbase', type='destination')
    destination.set_attributes(authentication_mode='USER',
                               bucket=bucket_name,
                               document_key=document_key,
                               data_format='BINARY',
                               binary_field_path="/data")

    wiretap = builder.add_wiretap()

    source >> destination
    source >= wiretap.destination

    pipeline = builder.build().configure_for_environment(couchbase)
    sdc_executor.add_pipeline(pipeline)

    try:
        logger.info('Creating %s Couchbase bucket ...', bucket_name)
        couchbase.bucket_manager.create_bucket(
            CreateBucketSettings(name=bucket_name,
                                 bucket_type='couchbase',
                                 ram_quota_mb=256))
        couchbase.wait_for_healthy_bucket(bucket_name)

        sdc_executor.start_pipeline(
            pipeline).wait_for_pipeline_output_records_count(batch_size)
        sdc_executor.stop_pipeline(pipeline)

        history = sdc_executor.get_pipeline_history(pipeline)
        num_records = history.latest.metrics.counter(
            'pipeline.batchInputRecords.counter').count
        logger.info(f"Wrote {num_records} records")
        assert num_records == len(wiretap.output_records)

        bucket = cluster.bucket(bucket_name)
        assert bucket.get(
            document_key).value == wiretap.output_records[0].field['data']

    finally:
        try:
            logger.info('Deleting %s Couchbase bucket ...', bucket_name)
            couchbase.bucket_manager.drop_bucket(bucket_name)
        except Exception as e:
            logger.error(f"Can't delete bucket: {e}")
def test_data_types_query(sdc_builder, sdc_executor, couchbase, input, test_name, expected_type, expected_value):
    if input is None:
        pytest.skip('Lookup Processor queries deal with null values as errors rather than treating them as values')
    bucket_name = get_random_string(string.ascii_letters, 10).lower()
    key = 'mydocid'
    doc = {'id': key, 'data': input}
    raw_dict = dict(id=key)
    raw_data = json.dumps(raw_dict)
    query = f'SELECT data FROM {bucket_name} WHERE ' + 'id="${record:value("/id")}"'
    cluster = couchbase.cluster

    try:
        # populate the database
        logger.info('Creating %s Couchbase bucket ...', bucket_name)
        couchbase.bucket_manager.create_bucket(CreateBucketSettings(name=bucket_name,
                                                                    bucket_type='couchbase',
                                                                    ram_quota_mb=256))
        couchbase.wait_for_healthy_bucket(bucket_name)

        bucket = cluster.bucket(bucket_name)
        bucket.upsert(key, doc)
        cluster.query(f'CREATE PRIMARY INDEX ON `{bucket_name}`').execute()

        # build the pipeline
        builder = sdc_builder.get_pipeline_builder()

        origin = builder.add_stage('Dev Raw Data Source')
        origin.set_attributes(data_format='JSON',
                              stop_after_first_batch=True,
                              raw_data=raw_data)

        lookup = builder.add_stage('Couchbase Lookup')
        lookup.set_attributes(authentication_mode='USER', bucket=bucket_name,
                              lookup_type='N1QL', n1ql_query=query,
                              n1ql_mappings=[dict(property='data', sdcField='/output')],
                              missing_value_behavior='ERROR')

        wiretap = builder.add_wiretap()

        origin >> lookup >> wiretap.destination

        pipeline = builder.build().configure_for_environment(couchbase)

        sdc_executor.add_pipeline(pipeline)
        sdc_executor.start_pipeline(pipeline).wait_for_finished()

        output_records = wiretap.output_records

        assert len(output_records) == 1
        assert output_records[0].field['output'].type == expected_type
        assert output_records[0].field['output'] == expected_value
    finally:
        try:
            logger.info('Deleting %s Couchbase bucket ...', bucket_name)
            couchbase.bucket_manager.drop_bucket(bucket_name)
        except Exception as e:
            logger.error(f"Can't delete bucket: {e}")
Ejemplo n.º 5
0
def test_couchbase_destination(sdc_builder, sdc_executor, couchbase):
    """
    Send simple JSON text into Couchbase destination from Dev Raw Data Source and assert Couchbase has received it.

    The pipeline looks like:
        dev_raw_data_source >> couchbase_destination
    """
    couchbase_host = f'{couchbase.hostname}:{couchbase.port}'
    bucket_name = get_random_string(string.ascii_letters, 10)
    document_key_field = 'mydocname'
    raw_dict = dict(f1='abc', f2='xyz', f3='lmn')
    raw_dict[document_key_field] = 'mydocid'
    raw_data = json.dumps(raw_dict)

    builder = sdc_builder.get_pipeline_builder()
    dev_raw_data_source = builder.add_stage('Dev Raw Data Source')
    dev_raw_data_source.set_attributes(data_format='JSON',
                                       raw_data=raw_data,
                                       stop_after_first_batch=True)
    couchbase_destination = builder.add_stage('Couchbase', type='destination')
    if Version(sdc_builder.version) < Version('3.9.0'):
        couchbase_destination.set_attributes(
            database_version='VERSION5',
            unique_document_key_field=document_key_field,
            bucket=bucket_name,
            couchbase_user_name=couchbase.username,
            couchbase_user_password=couchbase.password,
            url=couchbase_host)
    else:
        couchbase_destination.set_attributes(authentication_mode='USER',
                                             document_key="${record:value('/" +
                                             document_key_field + "')}",
                                             bucket=bucket_name,
                                             user_name=couchbase.username,
                                             password=couchbase.password,
                                             node_list=couchbase_host)

    dev_raw_data_source >> couchbase_destination
    pipeline = builder.build(title='Couchbase Destination pipeline'
                             ).configure_for_environment(couchbase)
    sdc_executor.add_pipeline(pipeline)

    try:
        logger.info('Creating %s Couchbase bucket ...', bucket_name)
        couchbase.admin.bucket_create(name=bucket_name,
                                      bucket_type='couchbase',
                                      ram_quota=256)
        couchbase.wait_for_healthy_bucket(bucket_name)

        sdc_executor.start_pipeline(pipeline).wait_for_finished()

        bucket = couchbase.cluster.open_bucket(bucket_name)
        doc_value = bucket.get(raw_dict[document_key_field]).value
        assert doc_value == raw_dict
    finally:
        logger.info('Deleting %s Couchbase bucket ...', bucket_name)
        couchbase.admin.bucket_delete(bucket_name)
def test_data_format_protobuf(sdc_builder, sdc_executor, couchbase):
    bucket_name = get_random_string(string.ascii_letters, 10)
    document_key = 'id'
    raw_data = '{"first_name": "Martin","last_name": "Balzamo"}'
    expected = '\x11\x06Martin\x12\x07Balzamo'
    cluster = couchbase.cluster

    # Build the pipeline
    builder = sdc_builder.get_pipeline_builder()

    source = builder.add_stage('Dev Raw Data Source').set_attributes(
        data_format='JSON', raw_data=raw_data, stop_after_first_batch=True)

    destination = builder.add_stage('Couchbase', type='destination')
    destination.set_attributes(authentication_mode='USER',
                               bucket=bucket_name,
                               document_key=document_key,
                               data_format='PROTOBUF',
                               message_type='Contact',
                               protobuf_descriptor_file=PROTOBUF_FILE_PATH)

    source >> destination

    pipeline = builder.build().configure_for_environment(couchbase)
    sdc_executor.add_pipeline(pipeline)

    try:
        logger.info('Creating %s Couchbase bucket ...', bucket_name)
        couchbase.bucket_manager.create_bucket(
            CreateBucketSettings(name=bucket_name,
                                 bucket_type='couchbase',
                                 ram_quota_mb=256))
        couchbase.wait_for_healthy_bucket(bucket_name)

        sdc_executor.start_pipeline(pipeline).wait_for_finished()

        history = sdc_executor.get_pipeline_history(pipeline)
        num_records = history.latest.metrics.counter(
            'pipeline.batchOutputRecords.counter').count
        logger.info(f"Wrote {num_records} records")
        assert num_records == 1, 'Number of records stored should equal the number of records that entered the pipeline'

        bucket = cluster.bucket(bucket_name)
        doc_value = bucket.get(document_key).value
        # Decode the bytes object returned by Couchbase and remove any record separators (newline characters)
        contents = doc_value.decode('ascii').replace('\n', '')
        assert contents == expected
    finally:
        if pipeline and sdc_executor.get_pipeline_status(
                pipeline).response.json().get('status') == 'RUNNING':
            sdc_executor.stop_pipeline(pipeline)
        try:
            logger.info('Deleting %s Couchbase bucket ...', bucket_name)
            couchbase.bucket_manager.drop_bucket(bucket_name)
        except Exception as e:
            logger.error(f"Can't delete bucket: {e}")
def test_multiple_batches(sdc_builder, sdc_executor, couchbase, batch_size):
    bucket_name = get_random_string(string.ascii_letters, 10)
    batches = 3

    # Build the pipeline
    builder = sdc_builder.get_pipeline_builder()

    source = builder.add_stage('Dev Data Generator')
    source.batch_size = batch_size
    source.fields_to_generate = [{"type": "LONG_SEQUENCE", "field": "seq"}]
    cluster = couchbase.cluster

    destination = builder.add_stage('Couchbase', type='destination')
    destination.set_attributes(authentication_mode='USER',
                               bucket=bucket_name,
                               document_key='${record:value("/seq")}')

    wiretap = builder.add_wiretap()

    source >> destination
    source >= wiretap.destination

    pipeline = builder.build().configure_for_environment(couchbase)
    sdc_executor.add_pipeline(pipeline)

    try:
        logger.info('Creating %s Couchbase bucket ...', bucket_name)
        couchbase.bucket_manager.create_bucket(
            CreateBucketSettings(name=bucket_name,
                                 bucket_type='couchbase',
                                 ram_quota_mb=256))
        couchbase.wait_for_healthy_bucket(bucket_name)

        sdc_executor.start_pipeline(
            pipeline).wait_for_pipeline_output_records_count(batches *
                                                             batch_size)
        sdc_executor.stop_pipeline(pipeline)

        history = sdc_executor.get_pipeline_history(pipeline)
        num_records = history.latest.metrics.counter(
            'pipeline.batchInputRecords.counter').count
        logger.info(f"Wrote {num_records} records")
        assert num_records == len(wiretap.output_records)

        bucket = cluster.bucket(bucket_name)
        for i in range(num_records):
            assert bucket.get(str(i)).value == wiretap.output_records[i].field

    finally:
        try:
            logger.info('Deleting %s Couchbase bucket ...', bucket_name)
            couchbase.bucket_manager.drop_bucket(bucket_name)
        except Exception as e:
            logger.error(f"Can't delete bucket: {e}")
def test_object_names_bucket(sdc_builder, sdc_executor, couchbase, test_name,
                             bucket_generator):
    """Test using different kinds of bucket names, asserting whether Couchbase receives the data"""
    bucket_name = bucket_generator()
    document_key_field = 'mydocname'
    raw_dict = dict(f1='abc', f2='xyz', f3='lmn')
    raw_dict[document_key_field] = 'mydocid'
    raw_data = json.dumps(raw_dict)
    cluster = couchbase.cluster

    # Build the pipeline
    builder = sdc_builder.get_pipeline_builder()

    source = builder.add_stage('Dev Raw Data Source').set_attributes(
        data_format='JSON', raw_data=raw_data, stop_after_first_batch=True)

    destination = builder.add_stage('Couchbase', type='destination')
    destination.set_attributes(authentication_mode='USER',
                               bucket=bucket_name,
                               document_key="${record:value('/" +
                               document_key_field + "')}")

    source >> destination

    pipeline = builder.build().configure_for_environment(couchbase)
    sdc_executor.add_pipeline(pipeline)

    try:
        logger.info('Creating %s Couchbase bucket ...', bucket_name)
        couchbase.bucket_manager.create_bucket(
            CreateBucketSettings(name=bucket_name,
                                 bucket_type='couchbase',
                                 ram_quota_mb=256))
        couchbase.wait_for_healthy_bucket(bucket_name.replace('%', '%25'))

        sdc_executor.start_pipeline(pipeline).wait_for_finished()

        # if the bucket name contains a percent sign, it gets interpreted as an escape character in the HTTP call made
        # by the open_bucket() method, so the ascii for percent (i.e. %25) should be used instead
        bucket = cluster.bucket(bucket_name.replace('%', '%25'))
        doc_value = bucket.get(raw_dict[document_key_field]).value
        assert doc_value == raw_dict
    finally:
        if pipeline and sdc_executor.get_pipeline_status(
                pipeline).response.json().get('status') == 'RUNNING':
            sdc_executor.stop_pipeline(pipeline)
        try:
            logger.info('Deleting %s Couchbase bucket ...', bucket_name)
            couchbase.bucket_manager.drop_bucket(bucket_name)
        except Exception as e:
            logger.error(f"Can't delete bucket: {e}")
def test_data_format_json(sdc_builder, sdc_executor, couchbase):
    bucket_name = get_random_string(string.ascii_letters, 10)
    document_key_field = 'mydocname'
    raw_dict = dict(f1='abc', f2='xyz', f3='lmn')
    raw_dict[document_key_field] = 'mydocid'
    raw_data = json.dumps(raw_dict)
    cluster = couchbase.cluster

    # Build the pipeline
    builder = sdc_builder.get_pipeline_builder()

    source = builder.add_stage('Dev Raw Data Source').set_attributes(
        data_format='JSON', raw_data=raw_data, stop_after_first_batch=True)

    destination = builder.add_stage('Couchbase', type='destination')
    destination.set_attributes(authentication_mode='USER',
                               bucket=bucket_name,
                               document_key="${record:value('/" +
                               document_key_field + "')}",
                               data_format='JSON')

    source >> destination

    pipeline = builder.build().configure_for_environment(couchbase)
    sdc_executor.add_pipeline(pipeline)

    try:
        logger.info('Creating %s Couchbase bucket ...', bucket_name)
        couchbase.bucket_manager.create_bucket(
            CreateBucketSettings(name=bucket_name,
                                 bucket_type='couchbase',
                                 ram_quota_mb=256))
        couchbase.wait_for_healthy_bucket(bucket_name)

        sdc_executor.start_pipeline(pipeline).wait_for_finished()

        bucket = cluster.bucket(bucket_name)
        doc_value = bucket.get(raw_dict[document_key_field]).value
        assert doc_value == raw_dict
    finally:
        if pipeline and sdc_executor.get_pipeline_status(
                pipeline).response.json().get('status') == 'RUNNING':
            sdc_executor.stop_pipeline(pipeline)
        try:
            logger.info('Deleting %s Couchbase bucket ...', bucket_name)
            couchbase.bucket_manager.drop_bucket(bucket_name)
        except Exception as e:
            logger.error(f"Can't delete bucket: {e}")
Ejemplo n.º 10
0
def test_couchbase_destination(sdc_builder, sdc_executor, couchbase):
    """
    Send simple JSON text into Couchbase destination from Dev Raw Data Source and assert Couchbase has received it.

    The pipeline looks like:
        dev_raw_data_source >> couchbase_destination
    """
    bucket_name = get_random_string(string.ascii_letters, 10)
    document_key_field = 'mydocname'
    raw_dict = dict(f1='abc', f2='xyz', f3='lmn')
    raw_dict[document_key_field] = 'mydocid'
    raw_data = json.dumps(raw_dict)
    cluster = couchbase.cluster

    builder = sdc_builder.get_pipeline_builder()
    dev_raw_data_source = builder.add_stage('Dev Raw Data Source')
    dev_raw_data_source.set_attributes(data_format='JSON',
                                       raw_data=raw_data,
                                       stop_after_first_batch=True)
    couchbase_destination = builder.add_stage('Couchbase', type='destination')
    couchbase_destination.set_attributes(authentication_mode='USER',
                                         document_key="${record:value('/" +
                                         document_key_field + "')}",
                                         bucket=bucket_name)

    dev_raw_data_source >> couchbase_destination
    pipeline = builder.build(title='Couchbase Destination pipeline'
                             ).configure_for_environment(couchbase)
    sdc_executor.add_pipeline(pipeline)

    try:
        logger.info('Creating %s Couchbase bucket ...', bucket_name)
        couchbase.bucket_manager.create_bucket(
            CreateBucketSettings(name=bucket_name,
                                 bucket_type='couchbase',
                                 ram_quota_mb=256))
        couchbase.wait_for_healthy_bucket(bucket_name)

        sdc_executor.start_pipeline(pipeline).wait_for_finished()

        bucket = cluster.bucket(bucket_name)
        doc_value = bucket.get(raw_dict[document_key_field]).value
        assert doc_value == raw_dict
    finally:
        logger.info('Deleting %s Couchbase bucket ...', bucket_name)
        couchbase.bucket_manager.drop_bucket(bucket_name)
def test_couchbase_destination(sdc_builder, sdc_executor, couchbase):
    """
    Send simple JSON text into Couchbase destination from Dev Raw Data Source and assert Couchbase has received it.

    The pipeline looks like:
        dev_raw_data_source >> couchbase_destination
    """
    bucket_name = get_random_string(string.ascii_letters, 10)
    document_key_field = 'mydocname'
    raw_dict = dict(f1='abc', f2='xyz', f3='lmn')
    raw_dict[document_key_field] = 'mydocid'
    raw_data = json.dumps(raw_dict)

    builder = sdc_builder.get_pipeline_builder()
    dev_raw_data_source = builder.add_stage('Dev Raw Data Source')
    dev_raw_data_source.set_attributes(data_format='JSON',
                                       raw_data=raw_data,
                                       stop_after_first_batch=True)
    couchbase_destination = builder.add_stage('Couchbase', type='destination')
    couchbase_destination.set_attributes(
        database_version='VERSION{}'.format(couchbase.version.split('.')[0]),
        unique_document_key_field=document_key_field,
        bucket=bucket_name)

    dev_raw_data_source >> couchbase_destination
    pipeline = builder.build(title='Couchbase Destination pipeline'
                             ).configure_for_environment(couchbase)
    sdc_executor.add_pipeline(pipeline)

    try:
        logger.info('Creating %s Couchbase bucket ...', bucket_name)
        couchbase.admin.bucket_create(name=bucket_name,
                                      bucket_type='couchbase',
                                      ram_quota=256)
        couchbase.wait_for_healthy_bucket(bucket_name)

        sdc_executor.start_pipeline(pipeline).wait_for_finished()

        bucket = couchbase.cluster.open_bucket(bucket_name)
        doc_value = bucket.get(raw_dict[document_key_field]).value
        assert doc_value == raw_dict
    finally:
        logger.info('Deleting %s Couchbase bucket ...', bucket_name)
        couchbase.admin.bucket_delete(bucket_name)
def test_lookup_query(sdc_builder, sdc_executor, couchbase,
                      test_name, input, expected, multiple_value_behavior, missing_value_behavior):
    bucket_name = get_random_string(string.ascii_letters, 10).lower()
    docs = [{'id': 'id1', 'data': 'hello'},
            {'id': 'id2', 'data': 'hello'},
            {'id': 'id3', 'data': 'hello'}]
    raw_dict = dict(criteria=input)
    raw_data = json.dumps(raw_dict)
    query = f"SELECT id FROM {bucket_name} WHERE " + '${record:value("/criteria")}'
    cluster = couchbase.cluster

    try:
        # populate the database
        logger.info('Creating %s Couchbase bucket ...', bucket_name)
        couchbase.bucket_manager.create_bucket(CreateBucketSettings(name=bucket_name,
                                                                    bucket_type='couchbase',
                                                                    ram_quota_mb=256))
        couchbase.wait_for_healthy_bucket(bucket_name)

        bucket = cluster.bucket(bucket_name)
        for doc in docs:
            bucket.upsert(doc['id'], doc)
        cluster.query(f'CREATE PRIMARY INDEX ON `{bucket_name}`').execute()

        # build the pipeline
        builder = sdc_builder.get_pipeline_builder()

        origin = builder.add_stage('Dev Raw Data Source')
        origin.set_attributes(data_format='JSON',
                              stop_after_first_batch=True,
                              raw_data=raw_data)

        lookup = builder.add_stage('Couchbase Lookup')
        lookup.set_attributes(authentication_mode='USER', bucket=bucket_name,
                              lookup_type='N1QL', n1ql_query=query,
                              n1ql_mappings=[dict(property='id', sdcField='/output')],
                              multiple_value_behavior=multiple_value_behavior,
                              missing_value_behavior=missing_value_behavior)

        wiretap = builder.add_wiretap()

        origin >> lookup >> wiretap.destination

        pipeline = builder.build().configure_for_environment(couchbase)

        sdc_executor.add_pipeline(pipeline)
        sdc_executor.start_pipeline(pipeline).wait_for_finished()

        output_records = wiretap.output_records
        error_records = wiretap.error_records

        print('output:', output_records)

        if missing_value_behavior == 'ERROR':
            # The input record should pass through to error records without an output field
            assert len(error_records) == 1
            assert 'output' not in error_records[0].field
        elif not expected:
            # The input record should pass through to output records without an output field
            assert len(output_records) == 1
            assert 'output' not in output_records[0].field
        else:
            assert len(output_records) == len(expected)
            # Check that the output records are as expected, allowing for reordering
            output_list = [record.field['output'] for record in output_records]
            assert Counter(output_list) == Counter(expected)
    finally:
        try:
            logger.info('Deleting %s Couchbase bucket ...', bucket_name)
            couchbase.bucket_manager.drop_bucket(bucket_name)
        except Exception as e:
            logger.error(f"Can't delete bucket: {e}")
def test_multiple_batches_query(sdc_builder, sdc_executor, couchbase, batch_size):
    bucket_name = get_random_string(string.ascii_letters, 10).lower()
    docs = [{"id": "1", "data": 10},
            {"id": "2", "data": 20},
            {"id": "3", "data": 30}]
    batches = 3
    query = f'SELECT data FROM {bucket_name} WHERE ' + 'id="${record:value("/lookup")}"'
    cluster = couchbase.cluster

    # populate the database
    logger.info('Creating %s Couchbase bucket ...', bucket_name)
    couchbase.bucket_manager.create_bucket(CreateBucketSettings(name=bucket_name,
                                                                bucket_type='couchbase',
                                                                ram_quota_mb=256))
    couchbase.wait_for_healthy_bucket(bucket_name)

    bucket = cluster.bucket(bucket_name)
    for doc in docs:
        bucket.upsert(doc["id"], doc)
    cluster.query(f'CREATE PRIMARY INDEX ON `{bucket_name}`').execute()

    # build the pipeline
    builder = sdc_builder.get_pipeline_builder()

    origin = builder.add_stage('Dev Data Generator')
    origin.fields_to_generate = [{
        "type": "LONG_SEQUENCE",
        "field": "seq"
    }]

    expression = builder.add_stage('Expression Evaluator')
    expression.field_expressions = [{
        'fieldToSet': '/lookup',
        'expression': '${record:value("/seq") % 3 + 1}'
    }]

    lookup = builder.add_stage('Couchbase Lookup')
    lookup.set_attributes(authentication_mode='USER', bucket=bucket_name,
                          lookup_type='N1QL', n1ql_query=query,
                          n1ql_mappings=[dict(property='data', sdcField='/output')],
                          missing_value_behavior='PASS')

    wiretap = builder.add_wiretap()

    origin >> expression >> lookup >> wiretap.destination

    pipeline = builder.build().configure_for_environment(couchbase)
    sdc_executor.add_pipeline(pipeline)

    try:
        # run the pipeline
        sdc_executor.start_pipeline(pipeline).wait_for_pipeline_output_records_count(batches * batch_size)
        sdc_executor.stop_pipeline(pipeline)

        history = sdc_executor.get_pipeline_history(pipeline)
        record_count = history.latest.metrics.counter('pipeline.batchInputRecords.counter').count
        logger.info(f"Wrote {record_count} records")

        records = wiretap.output_records
        assert len(records) == record_count

        # Verify each record
        def sort_func(entry):
            return entry.field['seq'].value

        records.sort(key=sort_func)

        expected_number = 0
        for record in records:
            assert record.field['seq'] == expected_number
            assert record.field['lookup'] == expected_number % 3 + 1
            assert record.field['output'] == (expected_number % 3 + 1) * 10

            expected_number = expected_number + 1
    finally:
        try:
            logger.info('Deleting %s Couchbase bucket ...', bucket_name)
            couchbase.bucket_manager.drop_bucket(bucket_name)
        except Exception as e:
            logger.error(f"Can't delete bucket: {e}")
def test_data_types(sdc_builder, sdc_executor, couchbase, input,
                    converter_type, expected):
    bucket_name = get_random_string(string.ascii_letters, 10)

    document_key_field = 'mydocname'
    raw_dict = {"value": input, document_key_field: 'mydocid'}
    raw_data = json.dumps(raw_dict)
    cluster = couchbase.cluster

    # Build the pipeline
    builder = sdc_builder.get_pipeline_builder()

    source = builder.add_stage('Dev Raw Data Source').set_attributes(
        data_format='JSON', raw_data=raw_data, stop_after_first_batch=True)

    converter = builder.add_stage('Field Type Converter')
    converter.conversion_method = 'BY_FIELD'
    converter.field_type_converter_configs = [{
        'fields': ['/value'],
        'targetType': converter_type,
        'dataLocale': 'en,US',
        'dateFormat': 'YYYY_MM_DD_HH_MM_SS',
        'zonedDateTimeFormat': 'ISO_OFFSET_DATE_TIME',
        'scale': 2
    }]

    destination = builder.add_stage('Couchbase', type='destination')
    destination.set_attributes(authentication_mode='USER',
                               bucket=bucket_name,
                               document_key="${record:value('/" +
                               document_key_field + "')}")

    source >> converter >> destination

    pipeline = builder.build().configure_for_environment(couchbase)
    pipeline.configuration["shouldRetry"] = False
    sdc_executor.add_pipeline(pipeline)

    try:
        logger.info('Creating %s Couchbase bucket ...', bucket_name)
        couchbase.bucket_manager.create_bucket(
            CreateBucketSettings(name=bucket_name,
                                 bucket_type='couchbase',
                                 ram_quota_mb=256))
        couchbase.wait_for_healthy_bucket(bucket_name)

        sdc_executor.start_pipeline(pipeline).wait_for_finished()

        bucket = cluster.bucket(bucket_name)
        doc_value = bucket.get(raw_dict[document_key_field]).value

        assert len(doc_value) == len(raw_dict)
        assert doc_value['value'] == expected
    finally:
        if pipeline and sdc_executor.get_pipeline_status(
                pipeline).response.json().get('status') == 'RUNNING':
            sdc_executor.stop_pipeline(pipeline)
        try:
            logger.info('Deleting %s Couchbase bucket ...', bucket_name)
            couchbase.bucket_manager.drop_bucket(bucket_name)
        except Exception as e:
            logger.error(f"Can't delete bucket: {e}")
def test_data_format_sdc_record(sdc_builder, sdc_executor, couchbase):
    bucket_name = get_random_string(string.ascii_letters, 10)
    document_key_field = 'field1'
    json_data = [{
        "field1": "abc",
        "field2": "def",
        "field3": "ghi"
    }, {
        "field1": "jkl",
        "field2": "mno",
        "field3": "pqr"
    }]
    raw_data = ''.join(json.dumps(record) for record in json_data)
    cluster = couchbase.cluster

    # Build the pipeline
    builder = sdc_builder.get_pipeline_builder()

    source = builder.add_stage('Dev Raw Data Source').set_attributes(
        data_format='JSON', raw_data=raw_data, stop_after_first_batch=True)

    destination = builder.add_stage('Couchbase', type='destination')
    destination.set_attributes(authentication_mode='USER',
                               bucket=bucket_name,
                               document_key="${record:value('/" +
                               document_key_field + "')}",
                               data_format='SDC_JSON')

    source >> destination

    pipeline = builder.build().configure_for_environment(couchbase)
    sdc_executor.add_pipeline(pipeline)

    try:
        logger.info('Creating %s Couchbase bucket ...', bucket_name)
        couchbase.bucket_manager.create_bucket(
            CreateBucketSettings(name=bucket_name,
                                 bucket_type='couchbase',
                                 ram_quota_mb=256))
        couchbase.wait_for_healthy_bucket(bucket_name)

        sdc_executor.start_pipeline(pipeline).wait_for_finished()

        history = sdc_executor.get_pipeline_history(pipeline)
        num_records = history.latest.metrics.counter(
            'pipeline.batchOutputRecords.counter').count
        logger.info(f"Wrote {num_records} records")
        assert num_records == len(json_data)

        bucket = cluster.bucket(bucket_name)

        for i in range(len(json_data)):
            doc_value = bucket.get(json_data[i][document_key_field]).value
            # Decode the bytes object and disregard the first character (0xa1)
            contents = doc_value.decode('latin1')[1:]
            # Decode the SDC Record JSON into a dictionary containing its value
            dictionary = json.loads(contents)
            value = sdc_value_reader(dictionary['value'])
            assert value == json_data[i]
    finally:
        if pipeline and sdc_executor.get_pipeline_status(
                pipeline).response.json().get('status') == 'RUNNING':
            sdc_executor.stop_pipeline(pipeline)
        try:
            logger.info('Deleting %s Couchbase bucket ...', bucket_name)
            couchbase.bucket_manager.drop_bucket(bucket_name)
        except Exception as e:
            logger.error(f"Can't delete bucket: {e}")
def test_data_format_avro(sdc_builder, sdc_executor, couchbase):
    bucket_name = get_random_string(string.ascii_letters, 10)
    document_key = 'id'

    DATA = {
        'name': 'boss',
        'age': 60,
        'emails': ['*****@*****.**', '*****@*****.**'],
        'boss': None
    }
    SCHEMA = {
        'namespace':
        'example.avro',
        'type':
        'record',
        'name':
        'Employee',
        'fields': [{
            'name': 'name',
            'type': 'string'
        }, {
            'name': 'age',
            'type': 'int'
        }, {
            'name': 'emails',
            'type': {
                'type': 'array',
                'items': 'string'
            }
        }, {
            'name': 'boss',
            'type': ['Employee', 'null']
        }]
    }
    cluster = couchbase.cluster

    # Build the pipeline
    builder = sdc_builder.get_pipeline_builder()

    source = builder.add_stage('Dev Raw Data Source').set_attributes(
        data_format='JSON',
        raw_data=json.dumps(DATA),
        stop_after_first_batch=True)

    destination = builder.add_stage('Couchbase', type='destination')
    destination.set_attributes(authentication_mode='USER',
                               bucket=bucket_name,
                               document_key=document_key,
                               data_format='AVRO',
                               avro_schema=json.dumps(SCHEMA),
                               avro_schema_location='INLINE')

    source >> destination

    pipeline = builder.build().configure_for_environment(couchbase)
    sdc_executor.add_pipeline(pipeline)

    try:
        logger.info('Creating %s Couchbase bucket ...', bucket_name)
        couchbase.bucket_manager.create_bucket(
            CreateBucketSettings(name=bucket_name,
                                 bucket_type='couchbase',
                                 ram_quota_mb=256))
        couchbase.wait_for_healthy_bucket(bucket_name)

        sdc_executor.start_pipeline(pipeline).wait_for_finished()

        bucket = cluster.bucket(bucket_name)
        doc_value = bucket.get(document_key).value

        # decode the bytes object returned by Couchbase
        file = BytesIO(doc_value)
        reader = DataFileReader(file, DatumReader())
        records = [record for record in reader]
        assert len(
            records
        ) == 1, 'Number of records stored should equal number of records that entered the pipeline'
        assert records[0] == DATA
        reader.close()
    finally:
        if pipeline and sdc_executor.get_pipeline_status(
                pipeline).response.json().get('status') == 'RUNNING':
            sdc_executor.stop_pipeline(pipeline)
        try:
            logger.info('Deleting %s Couchbase bucket ...', bucket_name)
            couchbase.bucket_manager.drop_bucket(bucket_name)
        except Exception as e:
            logger.error(f"Can't delete bucket: {e}")