def test__init__(self, mock_sparksession, mock_analysis):
        mock_context = MagicMock()
        mock_context.addFile.return_value = "test"
        mock_spark = MagicMock()
        mock_spark.sparkContext.return_value = mock_context
        mock_builder = MagicMock()
        mock_builder.getOrCreate.return_value = mock_spark
        mock_sparksession.builder
        mock_sparksession.builder.return_value = mock_builder
        mock_analysis.return_value = None

        cfg = Config(CONFIG)

        dispatcher = Dispatcher(cfg, CONFIG)

        self.assertIsInstance(
            dispatcher.executor,
            Executor,
            "executor should has type Executor")
        self.assertTrue(
            hasattr(
                dispatcher.executor, "set_pipeline_processing"),
            "executor should has set_pipeline_processing method")

        self.assertIsInstance(dispatcher.processor, Processor, "processor should has type Processor")
        self.assertTrue(hasattr(dispatcher.processor, "get_pipeline_processing"),
                        "processor should has get_pipeline_processing method")

        self.assertIsInstance(dispatcher.writers[0], OutputWriter, "Writer should has type WriterMock")
        self.assertTrue(hasattr(dispatcher.writers[0], "get_write_lambda"), "Writer should has get_write_lambda method")
Beispiel #2
0
    def test_write_tuple_to_influx(self):
        struct = {
            'operation_type':
            'reduce',
            'rule': [{
                'key': False,
                'input_field': 'packet_size',
                'func_name': 'Min'
            }, {
                'key': False,
                'input_field': 'traffic',
                'func_name': 'Max'
            }, {
                'key': False,
                'input_field': 'traffic2',
                'func_name': 'Sum'
            }]
        }
        enumerate_output_aggregation_field = {
            "packet_size": 0,
            "traffic": 1,
            "traffic2": 2
        }
        config = Config(CONFIG_PATH)
        self.__class__.influx_options = config.content["output"]["options"][
            "influx"]

        client = InfluxDBClientMock(self.__class__.influx_options["host"],
                                    self.__class__.influx_options["port"],
                                    self.__class__.influx_options["username"],
                                    self.__class__.influx_options["password"],
                                    self.__class__.influx_options["database"])

        self.__class__.writer = InfluxWriter(
            client, self.__class__.influx_options["database"],
            self.__class__.influx_options["measurement"], struct,
            enumerate_output_aggregation_field)

        write_lambda = self.__class__.writer.get_write_lambda()
        t = (2, 3, 5)
        write_lambda(t)

        result = self.__class__.writer.client.query("select * from {0}".format(
            self.__class__.influx_options["measurement"]))
        points = list(result.get_points())

        self.assertEqual(
            len(points), 1,
            "In {0} measurement should be written one point".format(
                self.__class__.influx_options["measurement"]))

        fields = [
            field["input_field"] for field in struct["rule"]
            if not field["key"]
        ]
        for index, name in enumerate(fields):
            self.assertEqual(points[0][name], t[index],
                             "Value should be {0}".format(t[index]))
Beispiel #3
0
    def test_getExecutor(self, mock_sparksession, mock_kafka_utils):
        mock_context = MagicMock()
        mock_context.addFile.return_value = "test"
        mock_spark = MagicMock()
        mock_spark.sparkContext.return_value = mock_context
        mock_builder = MagicMock()
        mock_builder.getOrCreate.return_value = mock_spark
        mock_sparksession.builder
        mock_sparksession.builder.return_value = mock_builder
        mock_dstream = MagicMock()
        mock_dstream.map.return_value = None
        mock_kafka_utils.createDirectStream.return_value = mock_dstream

        config = Config(CONFIG_PATH)
        factory = ReadFactory(config, CONFIG_PATH)
        test_executor = factory.get_executor()

        self.assertIsInstance(
            test_executor, StreamingExecutor,
            "When read csv file executor should be instance of BatchExecutable"
        )
Beispiel #4
0
    def test_separate_key_from_start(self):
        spark = SparkSession.builder.getOrCreate()
        sc = spark.sparkContext

        rdd = sc.parallelize([("217.69.143.60", 100, 4000),
                              ("217.69.143.60", 100, 4000),
                              ("192.168.30.2", 1500, 54000),
                              ("192.168.30.2", 200, 3000),
                              ("192.168.30.2", 200, 3000)])

        config = Config(CONFIG_PATH)
        aggregation_processor = AggregationProcessor(config, data_struct)

        separate_key = aggregation_processor._get_separate_key_lambda()
        result = separate_key(rdd)
        self.assertListEqual(result.collect(),
                             [(('217.69.143.60', ), (100, 4000)),
                              (('217.69.143.60', ), (100, 4000)),
                              (('192.168.30.2', ), (1500, 54000)),
                              (('192.168.30.2', ), (200, 3000)),
                              (('192.168.30.2', ), (200, 3000))],
                             "Lists should be equal")
Beispiel #5
0
    def test_build_lambda_for_reduce_by_key(self):
        spark = SparkSession.builder.getOrCreate()
        sc = spark.sparkContext

        rdd = sc.parallelize([("217.69.143.60", 100, 4000),
                              ("217.69.143.60", 100, 4000),
                              ("192.168.30.2", 1500, 54000),
                              ("192.168.30.2", 200, 3000),
                              ("192.168.30.2", 200, 3000)])

        config = Config(CONFIG_PATH)
        aggregation_processor = AggregationProcessor(config, data_struct)

        aggregation_lambda = aggregation_processor.get_aggregation_lambda()
        result = aggregation_lambda(rdd)
        output_list = result.collect()
        output_list = sorted(output_list, key=lambda x: x[0][0])

        self.assertListEqual(output_list, [(("192.168.30.2", ), 1900, 60000),
                                           (("217.69.143.60", ), 200, 8000)],
                             "Lists should be equal")
        spark.stop()
    def test__init__(self, mock_sparksession, mock_analysis):
        mock_context = MagicMock()
        mock_context.addFile.return_value = "test"
        mock_spark = MagicMock()
        mock_spark.sparkContext.return_value = mock_context
        mock_builder = MagicMock()
        mock_builder.getOrCreate.return_value = mock_spark
        mock_sparksession.builder
        mock_sparksession.builder.return_value = mock_builder
        mock_analysis.return_value = None
        config = Config(os.path.join(os.path.dirname(__file__), os.path.join("..", "data", "config_dispatcher.json")))
        dispatcher = Dispatcher(config)

        self.assertIsInstance(dispatcher.executor, Executor, "executor should has type Executor")
        self.assertTrue(hasattr(dispatcher.executor, "set_pipeline_processing"), "executor should has set_pipeline_processing method")

        self.assertIsInstance(dispatcher.processor, Processor, "processor should has type Processor")
        self.assertTrue(hasattr(dispatcher.processor, "get_pipeline_processing"),
                        "processor should has get_pipeline_processing method")

        self.assertIsInstance(dispatcher.writer, OutputWriter, "Writer should has type WriterMock")
        self.assertTrue(hasattr(dispatcher.writer, "get_write_lambda"), "Writer should has get_write_lambda method")
Beispiel #7
0
    def test_write_number_to_influx(self):
        struct = {
            'operation_type':
            'reduce',
            'rule': [{
                'key': False,
                'input_field': 'packet_size',
                'func_name': 'Min'
            }]
        }
        enumerate_output_aggregation_field = {"packet_size": 0}
        config = Config(CONFIG_PATH)
        self.__class__.influx_options = config.content["output"]["options"][
            "influx"]

        client = InfluxDBClientMock(self.__class__.influx_options["host"],
                                    self.__class__.influx_options["port"],
                                    self.__class__.influx_options["username"],
                                    self.__class__.influx_options["password"],
                                    self.__class__.influx_options["database"])

        self.__class__.writer = InfluxWriter(
            client, self.__class__.influx_options["database"],
            self.__class__.influx_options["measurement"], struct,
            enumerate_output_aggregation_field)

        write_lambda = self.__class__.writer.get_write_lambda()
        write_lambda(6)

        result = self.__class__.writer.client.query("select * from {0}".format(
            self.__class__.influx_options["measurement"]))
        points = list(result.get_points())

        self.assertEqual(
            len(points), 1,
            "In {0} measurement should be written one point".format(
                self.__class__.influx_options["measurement"]))

        self.assertEqual(points[0]["packet_size"], 6, "Value should be 6")
Beispiel #8
0
    def test_seaprate_key_from_end(self):
        spark = SparkSession.builder.getOrCreate()
        sc = spark.sparkContext

        rdd = sc.parallelize([(100, 4000, "217.69.143.60"),
                              (100, 4000, "217.69.143.60"),
                              (1500, 54000, "192.168.30.2"),
                              (200, 3000, "192.168.30.2"),
                              (200, 3000, "192.168.30.2")])

        config = Config(CONFIG_PATH)
        aggregation_processor = AggregationProcessor(
            config, StructType([packet_size, traffic, src_ip]))

        separate_key = aggregation_processor._get_separate_key_lambda()
        result = separate_key(rdd)
        self.assertListEqual(result.collect(),
                             [(('217.69.143.60', ), (100, 4000)),
                              (('217.69.143.60', ), (100, 4000)),
                              (('192.168.30.2', ), (1500, 54000)),
                              (('192.168.30.2', ), (200, 3000)),
                              (('192.168.30.2', ), (200, 3000))],
                             "Lists should be equal")
Beispiel #9
0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
import logging
from config_parsing.config import Config
from dispatcher.dispatcher import Dispatcher

if __name__ == "__main__":
    try:
        if len(sys.argv) != 2:
            logging.critical(
                "Invalid amount of arguments\nUsage: main.py <config.json>")
            exit(1)

        path_to_config = sys.argv[1].strip()
        config = Config(path_to_config)
        dispatcher = Dispatcher(config, path_to_config)
        dispatcher.run_pipeline()
        dispatcher.stop_pipeline()
    except KeyboardInterrupt:
        logging.warning("You terminated execution.")
        exit(2)
    except BaseException as ex:
        logging.exception(ex)
        exit(1)
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
import logging
from config_parsing.config import Config
from dispatcher.dispatcher import Dispatcher

if __name__ == "__main__":
    try:
        if len(sys.argv) != 2:
            logging.critical(
                "Invalid amount of arguments\nUsage: main.py config.json")
            exit(1)

        config = Config(sys.argv[1].strip())
        dispatcher = Dispatcher(config)
        dispatcher.run_pipeline()
        dispatcher.stop_pipeline()
    except KeyboardInterrupt:
        logging.warning("You terminated execution.")
        exit(2)
    except BaseException as ex:
        logging.exception(ex)
        exit(1)
Beispiel #11
0
 def test__number__(self):
     config = Config(CONFIG_PATH_NUM)
     p = Processor(config)
     self.assertIsInstance(
         p.transformation, types.LambdaType,
         "Processor#transformation should be a lambda object")