예제 #1
0
    def test_validate_raise_field_not_exists_when_rename_field(self):
        validator = TransformationsValidator(
            TransformationOperations(CONFIG_PATH), self.data_structure_pyspark)

        with self.assertRaises(errors.FieldNotExists):
            validator.validate(
                [FieldTransformation("size", "not_exists_field"), "dst_ip"])
예제 #2
0
    def test_validate_raise_field_not_exists_error(self):
        validator = TransformationsValidator(
            TransformationOperations(CONFIG_PATH), self.data_structure_pyspark)

        with self.assertRaises(errors.FieldNotExists):
            validator.validate(
                ["src_ip", "dst_ip", "packet_size", "sample_rate"])
    def test_build_lambda_processor_config(self):
        parser = TransformationsParser(["a: config('input.options.port')"])
        parser.run()
        operations = TransformationOperations(self.config)

        transformations_validator = TransformationsValidator(
            operations, self.data_structure)

        _ = transformations_validator.validate(parser.expanded_transformation)
        creator = TransformationCreator(self.data_structure,
                                        parser.expanded_transformation,
                                        TransformationOperations(self.config))

        transformation = creator.build_lambda()

        self.assertIsInstance(transformation, types.LambdaType,
                              "Transformation type should be lambda")

        spark = SparkSession.builder.getOrCreate()
        file = spark.read.csv(DATA_PATH, self.data_structure_pyspark)

        result = file.rdd.map(transformation)

        result = result.collect()

        self.assertListEqual(result,
                             [(29092, ), (29092, ), (29092, ), (29092, ),
                              (29092, )], "List of tuples should be equal")

        spark.stop()
    def test_build_lambda_numbers(self):
        st = SyntaxTree()
        st.operation = "_"
        st.children = [13]  # as if it parsed

        parsed_transformations = [FieldTransformation("a", st)]

        operations = TransformationOperations(self.config)

        transformations_validator = TransformationsValidator(
            operations, self.data_structure)
        _ = transformations_validator.validate(parsed_transformations)

        creator = TransformationCreator(self.data_structure,
                                        parsed_transformations,
                                        TransformationOperations(self.config))

        transformation = creator.build_lambda()

        self.assertIsInstance(transformation, types.LambdaType,
                              "Transformation type should be lambda")

        spark = SparkSession.builder.getOrCreate()
        file = spark.read.csv(DATA_PATH, self.data_structure_pyspark)

        result = file.rdd.map(transformation)

        result = result.collect()
        self.assertListEqual(result, [(13, ), (13, ), (13, ), (13, ), (13, )],
                             "List of tuples should be equal")

        spark.stop()
예제 #5
0
    def test_validate_raise_operation_not_supported_error(self):
        validator = TransformationsValidator(
            TransformationOperations(CONFIG_PATH), self.data_structure_pyspark)

        syntaxtree = SyntaxTree()
        syntaxtree.operation = "not_exists_operation"

        with self.assertRaises(errors.OperationNotSupportedError):
            validator.validate(
                [FieldTransformation("size", syntaxtree), "dst_ip"])
예제 #6
0
    def test_validate_raise_incorrect_argument_type_for_operation_error(self):
        validator = TransformationsValidator(
            TransformationOperations(CONFIG_PATH), self.data_structure_pyspark)

        syntaxtree = SyntaxTree()
        syntaxtree.operation = "mul"
        syntaxtree.children = ["src_ip", "packet_size"]

        with self.assertRaises(errors.IncorrectArgumentTypeForOperationError):
            validator.validate(
                [FieldTransformation("traffic", syntaxtree), "dst_ip"])
예제 #7
0
    def test_validate_correct_arguments_amount_for_operation_add(self):
        validator = TransformationsValidator(
            TransformationOperations(CONFIG_PATH), self.data_structure_pyspark)

        syntaxtree = SyntaxTree()
        syntaxtree.operation = "add"
        syntaxtree.children = [1, 2]

        fields = validator.validate([FieldTransformation("add", syntaxtree)])
        self.assertEqual(
            fields,
            types.StructType([types.StructField("add", types.LongType())]))
예제 #8
0
    def test_validate_raise_error_for_function_with_different_arguments_type(
            self):
        validator = TransformationsValidator(
            TransformationOperations(CONFIG_PATH), self.data_structure_pyspark)

        main_syntax_tree = SyntaxTree()
        main_syntax_tree.operation = "truncate"
        main_syntax_tree.children = ["src_ip", "dst_ip"]

        with self.assertRaises(errors.IncorrectArgumentTypeForOperationError):
            validator.validate(
                [FieldTransformation("result", main_syntax_tree)])
예제 #9
0
    def test_validate_rename_field(self):
        validator = TransformationsValidator(
            TransformationOperations(CONFIG_PATH), self.data_structure_pyspark)

        fields = validator.validate(
            [FieldTransformation("size", "packet_size"), "dst_ip"])

        self.assertEqual(
            fields,
            types.StructType([
                types.StructField('size', types.LongType()),
                types.StructField('dst_ip', types.StringType())
            ]))
예제 #10
0
    def test_validate_raise_incorrect_arguments_amount_for_operation_error(
            self):
        validator = TransformationsValidator(
            TransformationOperations(CONFIG_PATH), self.data_structure_pyspark)

        syntaxtree = SyntaxTree()
        syntaxtree.operation = "add"
        syntaxtree.children = [1, 2, 3]

        with self.assertRaises(
                errors.IncorrectArgumentsAmountForOperationError):
            validator.validate(
                [FieldTransformation("add", syntaxtree), "dst_ip"])
예제 #11
0
 def test_validate_work_success(self):
     validator = TransformationsValidator(
         TransformationOperations(CONFIG_PATH), self.data_structure_pyspark)
     fields = validator.validate(
         ["src_ip", "dst_ip", "packet_size", "sampling_rate"])
     self.assertEqual(
         fields,
         types.StructType([
             types.StructField('src_ip', types.StringType()),
             types.StructField('dst_ip', types.StringType()),
             types.StructField('packet_size', types.LongType()),
             types.StructField('sampling_rate', types.LongType())
         ]), 'StructType should be equal')
예제 #12
0
    def test_validate_function_with_different_arguments_type(self):
        validator = TransformationsValidator(
            TransformationOperations(CONFIG_PATH), self.data_structure_pyspark)

        tree = SyntaxTree()
        tree.operation = "truncate"
        tree.children = ["src_ip", 5]

        fields = validator.validate([FieldTransformation("result", tree)])

        self.assertEqual(
            fields,
            types.StructType([types.StructField("result",
                                                types.StringType())]))
예제 #13
0
    def test_validate_raise_operation_not_supported_error_for_subtree(self):
        validator = TransformationsValidator(
            TransformationOperations(CONFIG_PATH), self.data_structure_pyspark)

        syntaxtree = SyntaxTree()
        syntaxtree.operation = "not_exists_operator"
        syntaxtree.children = ["1", "2"]

        main_syntax_tree = SyntaxTree()
        main_syntax_tree.operation = "mul"
        main_syntax_tree.children = [syntaxtree, "1"]

        with self.assertRaises(errors.OperationNotSupportedError):
            validator.validate(
                [FieldTransformation("result", main_syntax_tree), "dst_ip"])
예제 #14
0
    def test_validate_with_correct_one_level_subtree(self):
        validator = TransformationsValidator(
            TransformationOperations(CONFIG_PATH), self.data_structure_pyspark)

        syntaxtree = SyntaxTree()
        syntaxtree.operation = "mul"
        syntaxtree.children = ["packet_size", "sampling_rate"]

        fields = validator.validate(
            [FieldTransformation("traffic", syntaxtree), "dst_ip"])

        self.assertEqual(
            fields,
            types.StructType([
                types.StructField('traffic', types.LongType()),
                types.StructField('dst_ip', types.StringType())
            ]))
예제 #15
0
    def test_validate_config_operation(self):
        validator = TransformationsValidator(
            TransformationOperations(CONFIG_PATH), self.data_structure_pyspark)

        syntaxtree = SyntaxTree()
        syntaxtree.operation = "config"
        syntaxtree.children = ["'input.options.port'"]

        main_syntax_tree = SyntaxTree()
        main_syntax_tree.operation = "concat"
        main_syntax_tree.children = [syntaxtree, "'sampling_rate'"]

        fields = validator.validate(
            [FieldTransformation("result", main_syntax_tree)])

        self.assertEqual(
            fields,
            types.StructType([types.StructField('result',
                                                types.StringType())]))
    def test_build_lambda_processor_add(self):
        self.maxDiff = None
        parser = TransformationsParser([
            "dst_ip: add(-13.5, 2)", "src_ip:add(-13.5,2)",
            "foobar: 'add(-13.5,2)'", "foobar2: 'add\\'(-13.5,2)'"
        ])
        parser.run()
        operations = TransformationOperations(self.config)

        transformations_validator = TransformationsValidator(
            operations, self.data_structure)
        _ = transformations_validator.validate(parser.expanded_transformation)
        creator = TransformationCreator(self.data_structure,
                                        parser.expanded_transformation,
                                        TransformationOperations(self.config))

        transformation = creator.build_lambda()

        self.assertIsInstance(transformation, types.LambdaType,
                              "Transformation type should be lambda")

        spark = SparkSession.builder.getOrCreate()
        file = spark.read.csv(DATA_PATH, self.data_structure_pyspark)

        result = file.rdd.map(transformation)

        result = result.collect()

        self.assertListEqual(result,
                             [(-11.5, -11.5, 'add(-13.5,2)', "add'(-13.5,2)"),
                              (-11.5, -11.5, 'add(-13.5,2)', "add'(-13.5,2)"),
                              (-11.5, -11.5, 'add(-13.5,2)', "add'(-13.5,2)"),
                              (-11.5, -11.5, 'add(-13.5,2)', "add'(-13.5,2)"),
                              (-11.5, -11.5, 'add(-13.5,2)', "add'(-13.5,2)")],
                             "List of tuples should be equal")

        spark.stop()
예제 #17
0
    def test_validate_with_transformation_primitives(self):
        validator = TransformationsValidator(
            TransformationOperations(CONFIG_PATH), self.data_structure_pyspark)
        # test "{foo: 'bar'}""
        tree = SyntaxTree()
        tree.operation = "concat"
        tree.children = [1, 2]
        fields = validator.validate([FieldTransformation("foo", tree)])

        self.assertEqual(
            fields,
            types.StructType([types.StructField("foo", types.StringType())]))

        tree.children = ["'1'", "'2'"]
        fields = validator.validate([FieldTransformation("foo", tree)])

        self.assertEqual(
            fields,
            types.StructType([types.StructField("foo", types.StringType())]))

        tree.children = [1E+2, "'1'"]
        fields = validator.validate([FieldTransformation("foo", tree)])

        self.assertEqual(
            fields,
            types.StructType([types.StructField("foo", types.StringType())]))

        tree.children = ["'foo\'bar'", "'2'"]
        fields = validator.validate([FieldTransformation("foo", tree)])

        self.assertEqual(
            fields,
            types.StructType([types.StructField("foo", types.StringType())]))

        tree.children = ["'foo\"bar'", 2]
        fields = validator.validate([FieldTransformation("foo", tree)])

        self.assertEqual(
            fields,
            types.StructType([types.StructField("foo", types.StringType())]))