예제 #1
0
def given_we_load_data_from_the_source_file_to_the_source_topic(step):
    topic = world.pipeline_config.raw_topic
    test_context = world.test_environment.load_context(
        EXTRACT_TRANSFORM_CONSUME_SCENARIO)

    knodes = world.pipeline_config.cluster.node_array
    kwriter = telegraf.KafkaIngestRecordWriter(knodes)

    kloader = telegraf.KafkaLoader(topic,
                                   kwriter,
                                   record_type='direct_sales_record',
                                   stream_id='test_stream_id',
                                   asset_id='test_asset_id')
    wloader = WorldLoader(topic,
                          kwriter,
                          record_type='direct_sales_record',
                          stream_id='test_stream_id',
                          asset_id='test_asset_id')
    processor = dmap.WhitespaceCleanupProcessor()
    extractor = dmap.CSVFileDataExtractor(processor,
                                          delimiter='|',
                                          quotechar='"')
    source_filename = world.pipeline_config.get_file_reference(
        test_context.test_file_alias)
    extractor.extract(source_filename, load_function=wloader.load)
    extractor.extract(source_filename, load_function=kloader.load)
    kwriter.sync(0.1)
    for rec in wloader.record_list:
        test_context.loaded_raw_record_list.append(rec)
    test_context.promise_queue_errors = kwriter.process_write_promise_queue()
예제 #2
0
def and_load_the_core_records(step):
    test_context = world.test_environment.load_context(
        EXTRACT_TRANSFORM_CONSUME_SCENARIO)
    knodes = world.pipeline_config.cluster.node_array
    kwriter = telegraf.KafkaIngestRecordWriter(knodes)
    topic = world.pipeline_config.staging_topic
    kloader = telegraf.KafkaLoader(world.pipeline_config.staging_topic,
                                   kwriter,
                                   record_type='direct_sales_record',
                                   stream_id='test_stream_id',
                                   asset_id='test_asset_id')
    wloader = WorldLoader(world.pipeline_config.staging_topic,
                          kwriter,
                          record_type='direct_sales_record',
                          stream_id='test_stream_id',
                          asset_id='test_asset_id')
    # print "\n\n #### record header = %s, record body = %s" % (
    # test_context.consumed_raw_into_sst_record_list[0]['header'],
    # test_context.consumed_raw_into_sst_record_list[0]['body'])

    for record in test_context.consumed_raw_into_core_record_list:
        wloader.load(record['body'])
        kloader.load(record['body'])

    kwriter.sync(0.1)
    for rec in wloader.record_list:
        test_context.loaded_sst_record_list.append(rec)
예제 #3
0
    def test_loader_can_send_records(self):
        kloader = tg.KafkaLoader(self.target_topic,
                                 self.kwriter,
                                 pipeline_id='mx_test_pipeline',
                                 record_type='mx_test_record')

        local_filename = 'data/sample_objectstore.csv'
        #processor = dmap.WhitespaceCleanupProcessor()
        extractor = dmap.CSVFileDataExtractor(None,
                                              delimiter='|',
                                              quotechar='"')

        extractor.extract(local_filename, load_function=kloader.load)
        self.kwriter.sync(0.1)
        self.assertTrue(self.kwriter.promise_queue_size > 0)
        self.assertEquals(len(self.kwriter.process_write_promise_queue()), 0)
예제 #4
0
def when_we_load_n_records_from_the_source_file(step, max_lin):
    test_context = world.test_environment.load_context(DATA_COMMIT_SCENARIO)

    max_lines = int(max_lin)
    topic = world.pipeline_config.get_user_topic('scratch_topic')

    knodes = world.pipeline_config.cluster.node_array
    kwriter = telegraf.KafkaIngestRecordWriter(knodes)

    kloader = telegraf.KafkaLoader(topic, kwriter, record_type='direct_sales_record', stream_id='test_stream_id',
                                   asset_id='test_asset_id')
    processor = dmap.WhitespaceCleanupProcessor()
    extractor = dmap.CSVFileDataExtractor(processor, delimiter='|', quotechar='"')

    source_filename = world.pipeline_config.get_file_reference(test_context.test_file_alias)
    extractor.extract(source_filename, load_function=kloader.load, max_lines=max_lines)
    kwriter.sync(0.1)