def test__init__(self, mock_sparksession, mock_analysis): mock_context = MagicMock() mock_context.addFile.return_value = "test" mock_spark = MagicMock() mock_spark.sparkContext.return_value = mock_context mock_builder = MagicMock() mock_builder.getOrCreate.return_value = mock_spark mock_sparksession.builder mock_sparksession.builder.return_value = mock_builder mock_analysis.return_value = None cfg = Config(CONFIG) dispatcher = Dispatcher(cfg, CONFIG) self.assertIsInstance( dispatcher.executor, Executor, "executor should has type Executor") self.assertTrue( hasattr( dispatcher.executor, "set_pipeline_processing"), "executor should has set_pipeline_processing method") self.assertIsInstance(dispatcher.processor, Processor, "processor should has type Processor") self.assertTrue(hasattr(dispatcher.processor, "get_pipeline_processing"), "processor should has get_pipeline_processing method") self.assertIsInstance(dispatcher.writers[0], OutputWriter, "Writer should has type WriterMock") self.assertTrue(hasattr(dispatcher.writers[0], "get_write_lambda"), "Writer should has get_write_lambda method")
def test_write_tuple_to_influx(self): struct = { 'operation_type': 'reduce', 'rule': [{ 'key': False, 'input_field': 'packet_size', 'func_name': 'Min' }, { 'key': False, 'input_field': 'traffic', 'func_name': 'Max' }, { 'key': False, 'input_field': 'traffic2', 'func_name': 'Sum' }] } enumerate_output_aggregation_field = { "packet_size": 0, "traffic": 1, "traffic2": 2 } config = Config(CONFIG_PATH) self.__class__.influx_options = config.content["output"]["options"][ "influx"] client = InfluxDBClientMock(self.__class__.influx_options["host"], self.__class__.influx_options["port"], self.__class__.influx_options["username"], self.__class__.influx_options["password"], self.__class__.influx_options["database"]) self.__class__.writer = InfluxWriter( client, self.__class__.influx_options["database"], self.__class__.influx_options["measurement"], struct, enumerate_output_aggregation_field) write_lambda = self.__class__.writer.get_write_lambda() t = (2, 3, 5) write_lambda(t) result = self.__class__.writer.client.query("select * from {0}".format( self.__class__.influx_options["measurement"])) points = list(result.get_points()) self.assertEqual( len(points), 1, "In {0} measurement should be written one point".format( self.__class__.influx_options["measurement"])) fields = [ field["input_field"] for field in struct["rule"] if not field["key"] ] for index, name in enumerate(fields): self.assertEqual(points[0][name], t[index], "Value should be {0}".format(t[index]))
def test_getExecutor(self, mock_sparksession, mock_kafka_utils): mock_context = MagicMock() mock_context.addFile.return_value = "test" mock_spark = MagicMock() mock_spark.sparkContext.return_value = mock_context mock_builder = MagicMock() mock_builder.getOrCreate.return_value = mock_spark mock_sparksession.builder mock_sparksession.builder.return_value = mock_builder mock_dstream = MagicMock() mock_dstream.map.return_value = None mock_kafka_utils.createDirectStream.return_value = mock_dstream config = Config(CONFIG_PATH) factory = ReadFactory(config, CONFIG_PATH) test_executor = factory.get_executor() self.assertIsInstance( test_executor, StreamingExecutor, "When read csv file executor should be instance of BatchExecutable" )
def test_separate_key_from_start(self): spark = SparkSession.builder.getOrCreate() sc = spark.sparkContext rdd = sc.parallelize([("217.69.143.60", 100, 4000), ("217.69.143.60", 100, 4000), ("192.168.30.2", 1500, 54000), ("192.168.30.2", 200, 3000), ("192.168.30.2", 200, 3000)]) config = Config(CONFIG_PATH) aggregation_processor = AggregationProcessor(config, data_struct) separate_key = aggregation_processor._get_separate_key_lambda() result = separate_key(rdd) self.assertListEqual(result.collect(), [(('217.69.143.60', ), (100, 4000)), (('217.69.143.60', ), (100, 4000)), (('192.168.30.2', ), (1500, 54000)), (('192.168.30.2', ), (200, 3000)), (('192.168.30.2', ), (200, 3000))], "Lists should be equal")
def test_build_lambda_for_reduce_by_key(self): spark = SparkSession.builder.getOrCreate() sc = spark.sparkContext rdd = sc.parallelize([("217.69.143.60", 100, 4000), ("217.69.143.60", 100, 4000), ("192.168.30.2", 1500, 54000), ("192.168.30.2", 200, 3000), ("192.168.30.2", 200, 3000)]) config = Config(CONFIG_PATH) aggregation_processor = AggregationProcessor(config, data_struct) aggregation_lambda = aggregation_processor.get_aggregation_lambda() result = aggregation_lambda(rdd) output_list = result.collect() output_list = sorted(output_list, key=lambda x: x[0][0]) self.assertListEqual(output_list, [(("192.168.30.2", ), 1900, 60000), (("217.69.143.60", ), 200, 8000)], "Lists should be equal") spark.stop()
def test__init__(self, mock_sparksession, mock_analysis): mock_context = MagicMock() mock_context.addFile.return_value = "test" mock_spark = MagicMock() mock_spark.sparkContext.return_value = mock_context mock_builder = MagicMock() mock_builder.getOrCreate.return_value = mock_spark mock_sparksession.builder mock_sparksession.builder.return_value = mock_builder mock_analysis.return_value = None config = Config(os.path.join(os.path.dirname(__file__), os.path.join("..", "data", "config_dispatcher.json"))) dispatcher = Dispatcher(config) self.assertIsInstance(dispatcher.executor, Executor, "executor should has type Executor") self.assertTrue(hasattr(dispatcher.executor, "set_pipeline_processing"), "executor should has set_pipeline_processing method") self.assertIsInstance(dispatcher.processor, Processor, "processor should has type Processor") self.assertTrue(hasattr(dispatcher.processor, "get_pipeline_processing"), "processor should has get_pipeline_processing method") self.assertIsInstance(dispatcher.writer, OutputWriter, "Writer should has type WriterMock") self.assertTrue(hasattr(dispatcher.writer, "get_write_lambda"), "Writer should has get_write_lambda method")
def test_write_number_to_influx(self): struct = { 'operation_type': 'reduce', 'rule': [{ 'key': False, 'input_field': 'packet_size', 'func_name': 'Min' }] } enumerate_output_aggregation_field = {"packet_size": 0} config = Config(CONFIG_PATH) self.__class__.influx_options = config.content["output"]["options"][ "influx"] client = InfluxDBClientMock(self.__class__.influx_options["host"], self.__class__.influx_options["port"], self.__class__.influx_options["username"], self.__class__.influx_options["password"], self.__class__.influx_options["database"]) self.__class__.writer = InfluxWriter( client, self.__class__.influx_options["database"], self.__class__.influx_options["measurement"], struct, enumerate_output_aggregation_field) write_lambda = self.__class__.writer.get_write_lambda() write_lambda(6) result = self.__class__.writer.client.query("select * from {0}".format( self.__class__.influx_options["measurement"])) points = list(result.get_points()) self.assertEqual( len(points), 1, "In {0} measurement should be written one point".format( self.__class__.influx_options["measurement"])) self.assertEqual(points[0]["packet_size"], 6, "Value should be 6")
def test_seaprate_key_from_end(self): spark = SparkSession.builder.getOrCreate() sc = spark.sparkContext rdd = sc.parallelize([(100, 4000, "217.69.143.60"), (100, 4000, "217.69.143.60"), (1500, 54000, "192.168.30.2"), (200, 3000, "192.168.30.2"), (200, 3000, "192.168.30.2")]) config = Config(CONFIG_PATH) aggregation_processor = AggregationProcessor( config, StructType([packet_size, traffic, src_ip])) separate_key = aggregation_processor._get_separate_key_lambda() result = separate_key(rdd) self.assertListEqual(result.collect(), [(('217.69.143.60', ), (100, 4000)), (('217.69.143.60', ), (100, 4000)), (('192.168.30.2', ), (1500, 54000)), (('192.168.30.2', ), (200, 3000)), (('192.168.30.2', ), (200, 3000))], "Lists should be equal")
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import sys import logging from config_parsing.config import Config from dispatcher.dispatcher import Dispatcher if __name__ == "__main__": try: if len(sys.argv) != 2: logging.critical( "Invalid amount of arguments\nUsage: main.py <config.json>") exit(1) path_to_config = sys.argv[1].strip() config = Config(path_to_config) dispatcher = Dispatcher(config, path_to_config) dispatcher.run_pipeline() dispatcher.stop_pipeline() except KeyboardInterrupt: logging.warning("You terminated execution.") exit(2) except BaseException as ex: logging.exception(ex) exit(1)
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import sys import logging from config_parsing.config import Config from dispatcher.dispatcher import Dispatcher if __name__ == "__main__": try: if len(sys.argv) != 2: logging.critical( "Invalid amount of arguments\nUsage: main.py config.json") exit(1) config = Config(sys.argv[1].strip()) dispatcher = Dispatcher(config) dispatcher.run_pipeline() dispatcher.stop_pipeline() except KeyboardInterrupt: logging.warning("You terminated execution.") exit(2) except BaseException as ex: logging.exception(ex) exit(1)
def test__number__(self): config = Config(CONFIG_PATH_NUM) p = Processor(config) self.assertIsInstance( p.transformation, types.LambdaType, "Processor#transformation should be a lambda object")