def feed_test_data(self): if self.config.input_data == 'recommend': logger.info("Test phase: data treated as recommendation requests, sending directly to topic " + self.config.recommendation_requests_topic) config = FlumeConfig(base_dir=self.flume_config_base_dir, template_file_name='idomaar-TO-kafka-direct.conf') config.set_value('agent.sinks.kafka_sink.topic', self.config.recommendation_requests_topic) config.set_value('agent.sources.idomaar_source.fileName', self.config.data_source) config.set_value('agent.sinks.kafka_sink.brokerList', self.environment.kafka_hostport) config.generate() logger.info("Start feeding data to Flume, Kafka sink topic is {0}".format(self.config.recommendation_requests_topic)) flume_conf_file = '/vagrant/flume-config/config/generated/idomaar-TO-kafka-direct.conf' flume_log_conf_dir = '/vagrant/flume-config/log4j/test' test_data_feed_command = "/opt/apache/flume/bin/flume-ng agent --conf {flume_log_conf_dir} --name agent --conf-file {flume_conf_file}" \ .format(flume_conf_file=flume_conf_file, flume_log_conf_dir=flume_log_conf_dir) self.executor.start_on_data_stream_manager(command=test_data_feed_command, process_name="to-kafka-flume") elif self.config.input_data == 'test': logger.info("Test phase: Input data treated as test data.") self.create_flume_config('idomaar-TO-kafka-test.conf') logger.info("Start feeding test data to queue") ## TODO CURRENTLY WE ARE TESTING ONLY "FILE" TYPE, WE NEED TO BE ABLE TO CONFIGURE A TEST OF TYPE STREAMING test_data_feed_command = "/opt/apache/flume/bin/flume-ng agent --conf /vagrant/flume-config/log4j/test --name a1 --conf-file /vagrant/flume-config/config/generated/idomaar-TO-kafka-test.conf -Didomaar.url=" + self.config.test_uri + " -Didomaar.sourceType=file" self.executor.run_on_data_stream_manager(test_data_feed_command) elif self.config.input_data == 'split': logger.info("Test phase: input data treated as test data.") idomaar_data_source = IdomaarDataSource(self.config.data_source) idomaar_data_source.check() self.evaluator_proxy.start_splitter(idomaar_data_source)
def create_flume_config(self, template_file_name): config = FlumeConfig(base_dir=self.flume_config_base_dir, template_file_name=template_file_name) config.set_value('a1.sinks.kafka_data.topic', self.config.data_topic) config.set_value('a1.sinks.kafka_rec.topic', self.config.recommendation_requests_topic) config.generate(template_file_name)
def start_input_feed(self): logger.info("Data is sent to data splitter via Kafka topic " + self.config.input_topic) config = FlumeConfig(base_dir=self.flume_config_base_dir, template_file_name='idomaar-TO-kafka-direct.conf') config.set_value('agent.sinks.kafka_sink.topic', self.config.input_topic) config.set_value('agent.sources.idomaar_source.fileName', self.config.data_source) config.generate() logger.info("Start feeding data to Flume, Kafka sink topic is {0}".format(self.config.input_topic)) test_data_feed_command = "/opt/apache/flume/bin/flume-ng agent --conf /vagrant/flume-config/log4j/test --name agent --conf-file /vagrant/flume-config/config/generated/idomaar-TO-kafka-direct.conf" self.executor.start_on_data_stream_manager(command=test_data_feed_command, process_name="to-kafka-flume")
def feed_test_data(self): if self.config.input_data == "recommend": logger.info( "Data treated as recommendation requests, sending directly to topic " + self.config.recommendation_requests_topic ) config = FlumeConfig(base_dir=self.flume_config_base_dir, template_file_name="idomaar-TO-kafka-direct.conf") config.set_value("agent.sinks.kafka_sink.topic", self.config.recommendation_requests_topic) config.set_value("agent.sources.idomaar_source.fileName", self.config.data_source) config.generate() logger.info( "Start feeding data to Flume, Kafka sink topic is {0}".format(self.config.recommendation_requests_topic) ) test_data_feed_command = "flume-ng agent --conf /vagrant/flume-config/log4j/test --name agent --conf-file /vagrant/flume-config/config/generated/idomaar-TO-kafka-direct.conf" self.executor.start_on_data_stream_manager(command=test_data_feed_command, process_name="to-kafka-flume") elif self.config.input_data == "test": self.create_flume_config("idomaar-TO-kafka-test.conf") logger.info("Start feeding test data to queue") ## TODO CURRENTLY WE ARE TESTING ONLY "FILE" TYPE, WE NEED TO BE ABLE TO CONFIGURE A TEST OF TYPE STREAMING test_data_feed_command = ( "flume-ng agent --conf /vagrant/flume-config/log4j/test --name a1 --conf-file /vagrant/flume-config/config/generated/idomaar-TO-kafka-test.conf -Didomaar.url=" + self.config.test_uri + " -Didomaar.sourceType=file" ) self.executor.run_on_data_stream_manager(test_data_feed_command) elif self.config.input_data == "split": input_file_location = "/vagrant/input/" + self.config.data_source self.check_exists(input_file_location) self.evaluator_proxy.start_splitter(input_file_location)
def start_input_feed(self): logger.info("Data is sent to data splitter via Kafka topic " + self.config.input_topic) config = FlumeConfig(base_dir=self.flume_config_base_dir, template_file_name="idomaar-TO-kafka-direct.conf") config.set_value("agent.sinks.kafka_sink.topic", self.config.input_topic) config.set_value("agent.sources.idomaar_source.fileName", self.config.data_source) config.generate() logger.info("Start feeding data to Flume, Kafka sink topic is {0}".format(self.config.input_topic)) test_data_feed_command = "flume-ng agent --conf /vagrant/flume-config/log4j/test --name agent --conf-file /vagrant/flume-config/config/generated/idomaar-TO-kafka-direct.conf" self.executor.start_on_data_stream_manager(command=test_data_feed_command, process_name="to-kafka-flume")
def feed_file_to_topic(self, data_source, topic_name, conf_file_suffix, kafka_hostport): logger.info("{file} is sent to Kafka topic {topic} ".format(file=data_source, topic=topic_name)) config_file_name = 'idomaar-TO-kafka-direct-' + conf_file_suffix + '.conf' config = FlumeConfig(base_dir=self.flume_config_base_dir, template_file_name='idomaar-TO-kafka-direct.conf') config.set_value('agent.sinks.kafka_sink.topic', topic_name) config.set_value('agent.sinks.kafka_sink.brokerList', kafka_hostport) if data_source.url: config.set_value('agent.sources.idomaar_source.url', data_source.url) else: config.set_value('agent.sources.idomaar_source.url', 'None') if data_source.file_name: config.set_value('agent.sources.idomaar_source.fileName', data_source.file_name) else: config.set_value('agent.sources.idomaar_source.fileName', 'None') config.set_value('agent.sources.idomaar_source.format', data_source.format) config.set_value('agent.channels.channel.checkpointDir', '/tmp/' + config_file_name + '/flume_data_checkpoint') config.set_value('agent.channels.channel.dataDirs', '/tmp/' + config_file_name + '/flume_data') config.generate(output_file_name=config_file_name) #logger.info("Start feeding data to Flume, Kafka sink topic is {0}".format(topic_name)) test_data_feed_command = "/opt/apache/flume/bin/flume-ng agent --conf /vagrant/flume-config/log4j/test --name agent --conf-file /vagrant/flume-config/config/generated/" + config_file_name self.executor.start_on_data_stream_manager(command=test_data_feed_command, process_name="to-topic-" + conf_file_suffix)
def create_flume_config(self, template_file_name): config = FlumeConfig(base_dir=self.flume_config_base_dir, template_file_name=template_file_name) config.set_value("a1.sinks.kafka_data.topic", self.config.data_topic) config.set_value("a1.sinks.kafka_rec.topic", self.config.recommendation_requests_topic) config.generate(template_file_name)
def feed_file_to_topic(self, data_source, topic_name, conf_file_suffix, kafka_hostport): logger.info("{file} is sent to Kafka topic {topic} ".format(file=data_source, topic=topic_name)) config_file_name = 'idomaar-TO-kafka-direct-' + conf_file_suffix + '.conf' config = FlumeConfig(base_dir=self.flume_config_base_dir, template_file_name='idomaar-TO-kafka-direct.conf') config.set_value('agent.sinks.kafka_sink.topic', topic_name) config.set_value('agent.sinks.kafka_sink.brokerList', kafka_hostport) if data_source.url: config.set_value('agent.sources.idomaar_source.url', data_source.url) if data_source.file_name: config.set_value('agent.sources.idomaar_source.fileName', data_source.file_name) config.set_value('agent.sources.idomaar_source.format', data_source.format) config.set_value('agent.channels.channel.checkpointDir', '/tmp/' + config_file_name + '/flume_data_checkpoint') config.set_value('agent.channels.channel.dataDirs', '/tmp/' + config_file_name + '/flume_data') config.generate(output_file_name=config_file_name) #logger.info("Start feeding data to Flume, Kafka sink topic is {0}".format(topic_name)) test_data_feed_command = "/opt/apache/flume/bin/flume-ng agent --conf /vagrant/flume-config/log4j/test --name agent --conf-file /vagrant/flume-config/config/generated/" + config_file_name self.executor.start_on_data_stream_manager(command=test_data_feed_command, process_name="to-topic-" + conf_file_suffix)
def feed_file_to_topic(self, input_file_name, topic_name, conf_file_suffix, file_format): logger.info("{file} is sent to Kafka topic {topic} ".format(file=input_file_name, topic=topic_name)) config_file_name = "idomaar-TO-kafka-direct-" + conf_file_suffix + ".conf" config = FlumeConfig(base_dir=self.flume_config_base_dir, template_file_name="idomaar-TO-kafka-direct.conf") config.set_value("agent.sinks.kafka_sink.topic", topic_name) config.set_value("agent.sources.idomaar_source.fileName", input_file_name) config.set_value("agent.sources.idomaar_source.format", file_format) config.set_value("agent.channels.channel.checkpointDir", "/tmp/" + config_file_name + "/flume_data_checkpoint") config.set_value("agent.channels.channel.dataDirs", "/tmp/" + config_file_name + "/flume_data") config.generate(output_file_name=config_file_name) # logger.info("Start feeding data to Flume, Kafka sink topic is {0}".format(topic_name)) test_data_feed_command = ( "/opt/apache/flume/bin/flume-ng agent --conf /vagrant/flume-config/log4j/test --name agent --conf-file /vagrant/flume-config/config/generated/" + config_file_name ) self.executor.start_on_data_stream_manager( command=test_data_feed_command, process_name="to-topic-" + conf_file_suffix )
def create_flume_config(self, template_file_name): config = FlumeConfig(base_dir=self.flume_config_base_dir, template_file_name=template_file_name) config.set_value('a1.sinks.kafka_data.topic', self.config.data_topic) config.set_value('a1.sinks.kafka_rec.topic', self.config.recommendation_requests_topic) config.set_value('a1.sinks.kafka_rec.brokerList', self.environment.kafka_hostport) config.set_value('a1.sinks.kafka_data.brokerList', self.environment.kafka_hostport) config.generate(template_file_name)