def given_we_load_data_from_the_source_file_to_the_source_topic(step): topic = world.pipeline_config.raw_topic test_context = world.test_environment.load_context( EXTRACT_TRANSFORM_CONSUME_SCENARIO) knodes = world.pipeline_config.cluster.node_array kwriter = telegraf.KafkaIngestRecordWriter(knodes) kloader = telegraf.KafkaLoader(topic, kwriter, record_type='direct_sales_record', stream_id='test_stream_id', asset_id='test_asset_id') wloader = WorldLoader(topic, kwriter, record_type='direct_sales_record', stream_id='test_stream_id', asset_id='test_asset_id') processor = dmap.WhitespaceCleanupProcessor() extractor = dmap.CSVFileDataExtractor(processor, delimiter='|', quotechar='"') source_filename = world.pipeline_config.get_file_reference( test_context.test_file_alias) extractor.extract(source_filename, load_function=wloader.load) extractor.extract(source_filename, load_function=kloader.load) kwriter.sync(0.1) for rec in wloader.record_list: test_context.loaded_raw_record_list.append(rec) test_context.promise_queue_errors = kwriter.process_write_promise_queue()
def load_test_data(self, filename, tablespec): cproc = dmap.ConsoleProcessor() #insert_proc = dmap.SQLTableInsertProcessor(db, self.insert_test_record, cproc) extractor = dmap.CSVFileDataExtractor(insert_proc, quotechar='"', delimiter='|') extractor.extract(filename)
def test_loader_can_send_records(self): kloader = tg.KafkaLoader(self.target_topic, self.kwriter, pipeline_id='mx_test_pipeline', record_type='mx_test_record') local_filename = 'data/sample_objectstore.csv' #processor = dmap.WhitespaceCleanupProcessor() extractor = dmap.CSVFileDataExtractor(None, delimiter='|', quotechar='"') extractor.extract(local_filename, load_function=kloader.load) self.kwriter.sync(0.1) self.assertTrue(self.kwriter.promise_queue_size > 0) self.assertEquals(len(self.kwriter.process_write_promise_queue()), 0)
def when_we_load_n_records_from_the_source_file(step, max_lin): test_context = world.test_environment.load_context(DATA_COMMIT_SCENARIO) max_lines = int(max_lin) topic = world.pipeline_config.get_user_topic('scratch_topic') knodes = world.pipeline_config.cluster.node_array kwriter = telegraf.KafkaIngestRecordWriter(knodes) kloader = telegraf.KafkaLoader(topic, kwriter, record_type='direct_sales_record', stream_id='test_stream_id', asset_id='test_asset_id') processor = dmap.WhitespaceCleanupProcessor() extractor = dmap.CSVFileDataExtractor(processor, delimiter='|', quotechar='"') source_filename = world.pipeline_config.get_file_reference(test_context.test_file_alias) extractor.extract(source_filename, load_function=kloader.load, max_lines=max_lines) kwriter.sync(0.1)