Ejemplo n.º 1
0
    def test_run(self):
        config = TransformationsParserConfig(CONFIG_PATH)
        parser = TransformationsParser(
            config.content["processing"]["transformation"])

        parser.run()

        #self.assertEqual(len(parser.expanded_transformation), 5, 'Transformations should contain 5 elements')

        self.assertEqual(
            parser.expanded_transformation[1], 'dst_ip',
            "2 element in expanded transformation should be 'dst_ip'")

        for index in [0, 2, 3]:
            self.assertIsInstance(
                parser.expanded_transformation[index], FieldTransformation,
                "{} element expanded transformation should has FieldTransformation type"
                .format(index))

            self.assertEqual(
                parser.expanded_transformation[index].name,
                stub['run_test'][index]['field_name'],
                "expanded_transformation[{}].field_name should be {}".format(
                    index, stub["run_test"][index]["field_name"]))

            self.assertIsInstance(
                parser.expanded_transformation[index].body,
                stub["run_test"][index]["type"],
                'expanded_transformation[{}].operation should be instance of {}'
                .format(index, stub["run_test"][index]["type"]))
    def test__parse_field(self):
        config = TransformationsParserConfig(CONFIG_PATH)
        parser = TransformationsParser(config.content["processing"]["transformation"])

        result = parser._parse("sample_rating")
        self.assertIsInstance(result, str, "Result should be instance of string")
        self.assertEqual(result, "sample_rating", "Value this leaf node should be 'sample_rating'")
Ejemplo n.º 3
0
    def test__parse_raise_incorrect_expression_error(self):
        config = TransformationsParserConfig(CONFIG_PATH)
        parser = TransformationsParser(
            config.content["processing"]["transformation"])

        with self.assertRaises(errors.IncorrectExpression):
            parser._parse("add((1,2)", True)
    def test_build_lambda_processor_config(self):
        parser = TransformationsParser(["a: config('input.options.port')"])
        parser.run()
        operations = TransformationOperations(self.config)

        transformations_validator = TransformationsValidator(
            operations, self.data_structure)

        _ = transformations_validator.validate(parser.expanded_transformation)
        creator = TransformationCreator(self.data_structure,
                                        parser.expanded_transformation,
                                        TransformationOperations(self.config))

        transformation = creator.build_lambda()

        self.assertIsInstance(transformation, types.LambdaType,
                              "Transformation type should be lambda")

        spark = SparkSession.builder.getOrCreate()
        file = spark.read.csv(DATA_PATH, self.data_structure_pyspark)

        result = file.rdd.map(transformation)

        result = result.collect()

        self.assertListEqual(result,
                             [(29092, ), (29092, ), (29092, ), (29092, ),
                              (29092, )], "List of tuples should be equal")

        spark.stop()
    def test__parse_nested_operations(self):
        config = TransformationsParserConfig(CONFIG_PATH)
        parser = TransformationsParser(config.content["processing"]["transformation"])

        expression = "minus(mult({}),mult({},sum({})))".format(",".join(stub["first_mult"]), stub["second_mult"][0],
                                                               ",".join(stub["second_mult"][1]))
        result = parser._parse(expression)
        self.assertIsInstance(result, SyntaxTree, "Result should be instance of SyntaxTree")
        self.assertEqual(result.operation, "minus", "Operation should be 'minus'")
        self.assertEqual(len(result.children), 2, "Should have 2 children")

        # Check first child # mult(1,3)
        first_mult = result.children[0]  # mult(1,3)

        self.assertIsInstance(first_mult, SyntaxTree, "Result should be instance of SyntaxTree")
        self.assertEqual(first_mult.operation, "mult", "Operation should be 'mult'")
        self.assertEqual(len(first_mult.children), 2, "Should have 2 children")

        for index in range(0, 2):
            self.assertIsInstance(first_mult.children[index], str,
                                  "children[{}] should be instance of str".format(index))
            self.assertEqual(first_mult.children[index], stub["first_mult"][index],
                             "Mult {} argument should be {}".format(index, stub["first_mult"][index]))

        # Check second child mult(1,sum(2,3))
        second_mult = result.children[1]
        self.assertIsInstance(second_mult, SyntaxTree, "Result should be instance of SyntaxTree")
        self.assertEqual(second_mult.operation, "mult", "Operation should be 'mult'")
        self.assertEqual(len(second_mult.children), 2, "Should have 2 children")

        # second_mult[0] should be 1
        self.assertIsInstance(second_mult.children[0], str,
                              "children[{}] should be instance of str".format(0))
        self.assertEqual(second_mult.children[0], stub["second_mult"][0],
                         "Mult {} argument should be {}".format(0, stub["second_mult"][0]))

        # second_mult[1] should be SyntaxTree
        sub_sum = second_mult.children[1]
        self.assertIsInstance(sub_sum, SyntaxTree,
                              "children[{}] should be instance of SyntaxTree".format(1))
        self.assertEqual(sub_sum.operation, "sum", "Operation should be 'sum'")
        self.assertEqual(len(sub_sum.children), 2, "Should have 2 children")

        for index in range(0, 2):
            self.assertIsInstance(sub_sum.children[index], str,
                                  "children[{}] should be instance of str".format(index))
            self.assertEqual(sub_sum.children[index], stub["second_mult"][1][index],
                             "Sum {} argument should be {}".format(index, stub["second_mult"][1][index]))
    def test__parse_simple_operation(self):
        config = TransformationsParserConfig(CONFIG_PATH)
        parser = TransformationsParser(config.content["processing"]["transformation"])

        expression = "sum({})".format(",".join(stub["sum"]))

        result = parser._parse(expression)
        self.assertIsInstance(result, SyntaxTree, "Result should be instance of SyntaxTree")
        self.assertEqual(result.operation, "sum", "Operation should be 'sum'")
        self.assertEqual(len(result.children), 3, "Should have 3 children")

        for index in range(0, 3):
            self.assertIsInstance(result.children[index], str,
                                  "children[{}] should be instance of Leaf".format(index))
            self.assertEqual(result.children[index], stub["sum"][index],
                             "Sum {} argument should be {}".format(index, stub["sum"][index]))
Ejemplo n.º 7
0
    def test__parse_config_operation(self):
        config = TransformationsParserConfig(CONFIG_PATH)
        parser = TransformationsParser(
            config.content["processing"]["transformation"])

        expression = "config({})".format(stub["config"])

        result = parser._parse(expression, True)

        self.assertIsInstance(result, SyntaxTree,
                              "Result should be instance of SyntaxTree")
        self.assertEqual(result.operation, "config",
                         "Operation should be 'config'")
        self.assertEqual(len(result.children), 1, "Should have 1 children")

        self.assertIsInstance(
            result.children[0], str,
            "children[{}] should be instance of Leaf".format(0))
    def test_build_lambda_processor_add(self):
        self.maxDiff = None
        parser = TransformationsParser([
            "dst_ip: add(-13.5, 2)", "src_ip:add(-13.5,2)",
            "foobar: 'add(-13.5,2)'", "foobar2: 'add\\'(-13.5,2)'"
        ])
        parser.run()
        operations = TransformationOperations(self.config)

        transformations_validator = TransformationsValidator(
            operations, self.data_structure)
        _ = transformations_validator.validate(parser.expanded_transformation)
        creator = TransformationCreator(self.data_structure,
                                        parser.expanded_transformation,
                                        TransformationOperations(self.config))

        transformation = creator.build_lambda()

        self.assertIsInstance(transformation, types.LambdaType,
                              "Transformation type should be lambda")

        spark = SparkSession.builder.getOrCreate()
        file = spark.read.csv(DATA_PATH, self.data_structure_pyspark)

        result = file.rdd.map(transformation)

        result = result.collect()

        self.assertListEqual(result,
                             [(-11.5, -11.5, 'add(-13.5,2)', "add'(-13.5,2)"),
                              (-11.5, -11.5, 'add(-13.5,2)', "add'(-13.5,2)"),
                              (-11.5, -11.5, 'add(-13.5,2)', "add'(-13.5,2)"),
                              (-11.5, -11.5, 'add(-13.5,2)', "add'(-13.5,2)"),
                              (-11.5, -11.5, 'add(-13.5,2)', "add'(-13.5,2)")],
                             "List of tuples should be equal")

        spark.stop()
Ejemplo n.º 9
0
 def test__parse_transformation_types(self):
     parser = TransformationsParser([])
     p = parser._parse("1", True)
     self.assertIsInstance(p, SyntaxTree,
                           "Result should be an instance of int")
     self.assertIsInstance(p.children[0], int,
                           "Result should be an instance of int")
     p = parser._parse("1.0", True)
     self.assertIsInstance(p, SyntaxTree,
                           "Result should be an instance of float")
     self.assertIsInstance(p.children[0], float,
                           "Result should be an instance of int")
     p = parser._parse("False", True)
     self.assertIsInstance(p, SyntaxTree,
                           "Result should be an instance of bool")
     self.assertIsInstance(p.children[0], bool,
                           "Result should be an instance of int")
     p = parser._parse("True", True)
     self.assertIsInstance(p, SyntaxTree,
                           "Result should be an instance of bool")
     self.assertIsInstance(p.children[0], bool,
                           "Result should be an instance of int")
     p = parser._parse("'Fo,o'", True)
     self.assertIsInstance(p, SyntaxTree,
                           "Result should be an instance of str")
     self.assertIsInstance(p.children[0], str,
                           "Result should be an instance of int")
     p = parser._parse("'Foo\\'Bar'", True)
     self.assertIsInstance(p, SyntaxTree,
                           "Result should be an instance of str")
     self.assertIsInstance(p.children[0], str,
                           "Result should be an instance of int")
     p = parser._parse("'Bar'", True)
     self.assertIsInstance(p, SyntaxTree,
                           "Result should be an instance of str")
     self.assertIsInstance(p.children[0], str,
                           "Result should be an instance of int")
     p = parser._parse("4E+8", True)
     self.assertIsInstance(p, SyntaxTree,
                           "Result should be an instance of float")
     self.assertIsInstance(p.children[0], float,
                           "Result should be an instance of int")
     p = parser._parse("'Foo\\\"bar'", True)
     self.assertIsInstance(p, SyntaxTree,
                           "Result should be an instance of str")
     self.assertIsInstance(p.children[0], str,
                           "Result should be an instance of int")
     p = parser._parse("sample_rating", True)
     self.assertIsInstance(p, str, "Result should be an instance of str")