コード例 #1
0
    def test_build_lambda_processor_config(self):
        parser = TransformationsParser(["a: config('input.options.port')"])
        parser.run()
        operations = TransformationOperations(self.config)

        transformations_validator = TransformationsValidator(
            operations, self.data_structure)

        _ = transformations_validator.validate(parser.expanded_transformation)
        creator = TransformationCreator(self.data_structure,
                                        parser.expanded_transformation,
                                        TransformationOperations(self.config))

        transformation = creator.build_lambda()

        self.assertIsInstance(transformation, types.LambdaType,
                              "Transformation type should be lambda")

        spark = SparkSession.builder.getOrCreate()
        file = spark.read.csv(DATA_PATH, self.data_structure_pyspark)

        result = file.rdd.map(transformation)

        result = result.collect()

        self.assertListEqual(result,
                             [(29092, ), (29092, ), (29092, ), (29092, ),
                              (29092, )], "List of tuples should be equal")

        spark.stop()
コード例 #2
0
    def test_build_lambda_numbers(self):
        st = SyntaxTree()
        st.operation = "_"
        st.children = [13]  # as if it parsed

        parsed_transformations = [FieldTransformation("a", st)]

        operations = TransformationOperations(self.config)

        transformations_validator = TransformationsValidator(
            operations, self.data_structure)
        _ = transformations_validator.validate(parsed_transformations)

        creator = TransformationCreator(self.data_structure,
                                        parsed_transformations,
                                        TransformationOperations(self.config))

        transformation = creator.build_lambda()

        self.assertIsInstance(transformation, types.LambdaType,
                              "Transformation type should be lambda")

        spark = SparkSession.builder.getOrCreate()
        file = spark.read.csv(DATA_PATH, self.data_structure_pyspark)

        result = file.rdd.map(transformation)

        result = result.collect()
        self.assertListEqual(result, [(13, ), (13, ), (13, ), (13, ), (13, )],
                             "List of tuples should be equal")

        spark.stop()
コード例 #3
0
    def test_build_lambda_add_scientific(self):
        st = SyntaxTree()
        st.operation = "add"
        st.children = [1.2E+5, 1.0]
        parsed_transformations = [FieldTransformation("sum", st)]
        creator = TransformationCreator(self.data_structure,
                                        parsed_transformations,
                                        TransformationOperations(self.config))

        transformation = creator.build_lambda()

        self.assertIsInstance(transformation, types.LambdaType,
                              "Transformation type should be lambda")

        spark = SparkSession.builder.getOrCreate()
        file = spark.read.csv(DATA_PATH, self.data_structure_pyspark)

        result = file.rdd.map(transformation)

        result = result.collect()
        self.assertListEqual(result, [(120001.0, ), (120001.0, ), (120001.0, ),
                                      (120001.0, ), (120001.0, )],
                             "List of tuples should be equal")

        spark.stop()
コード例 #4
0
    def test_build_lambda_truncate(self):
        st = SyntaxTree()
        st.operation = "truncate"
        st.children = ["'test'", 2]

        parsed_transformations = [
            FieldTransformation("cut_upto_2_symbols", st)
        ]
        creator = TransformationCreator(self.data_structure,
                                        parsed_transformations,
                                        TransformationOperations(self.config))

        transformation = creator.build_lambda()

        self.assertIsInstance(transformation, types.LambdaType,
                              "Transformation type should be lambda")

        spark = SparkSession.builder.getOrCreate()
        file = spark.read.csv(DATA_PATH, self.data_structure_pyspark)

        result = file.rdd.map(transformation)

        result = result.collect()
        self.assertListEqual(result, [('te', ), ('te', ), ('te', ), ('te', ),
                                      ('te', )],
                             "List of tuples should be equal")

        spark.stop()
コード例 #5
0
    def test_build_lambda_with_nested_literals(self):
        st = SyntaxTree()
        st.operation = "concat"
        # should cast int to str and concat
        st.children = ["'6'", "packet_size"]  # packet_size [74, 68]

        st2 = SyntaxTree()
        st2.operation = "concat"
        st2.children = [2E+2, st]

        parsed_transformations = [FieldTransformation("nested", st2)]
        creator = TransformationCreator(self.data_structure,
                                        parsed_transformations,
                                        TransformationOperations(self.config))
        transformation = creator.build_lambda()

        self.assertIsInstance(transformation, types.LambdaType,
                              "Transformation type should be lambda")

        spark = SparkSession.builder.getOrCreate()
        file = spark.read.csv(DATA_PATH, self.data_structure_pyspark)

        result = file.rdd.map(transformation)

        result = result.collect()
        self.assertListEqual(result, [('200.0674', ), ('200.0668', ),
                                      ('200.061510', ), ('200.06185', ),
                                      ('200.06185', )],
                             "List of tuples should be equal")
        spark.stop()
コード例 #6
0
    def test_build_lambda_with_literals(self):
        st = SyntaxTree()
        st.operation = "concat"
        st.children = ["'6 - '", "packet_size"]  # packet_size [74, 68]

        parsed_transformations = [FieldTransformation("ephemer", st)]

        creator = TransformationCreator(self.data_structure,
                                        parsed_transformations,
                                        TransformationOperations(self.config))
        transformation = creator.build_lambda()

        self.assertIsInstance(transformation, types.LambdaType,
                              "Transformation type should be lambda")

        spark = SparkSession.builder.getOrCreate()
        file = spark.read.csv(DATA_PATH, self.data_structure_pyspark)

        result = file.rdd.map(transformation)

        result = result.collect()
        self.assertListEqual(result, [("6 - 74", ), ("6 - 68", ),
                                      ("6 - 1510", ), ("6 - 185", ),
                                      ("6 - 185", )],
                             "List of tuples should be equal")
    def test_build_lambda_with_nested_operations(self):
        mult_syntax_tree = SyntaxTree()
        mult_syntax_tree.operation = "mult"
        mult_syntax_tree.children = ["packet_size", "sampling_rate"]

        root_mult_st = SyntaxTree()
        root_mult_st.operation = "mult"
        root_mult_st.children = [mult_syntax_tree, "10"]

        parsed_transformations = [
            "src_ip",
            FieldTransformation("destination_ip", "dst_ip"),
            FieldTransformation("traffic", root_mult_st)
        ]

        creator = TransformationCreator(
            self.data_structure, parsed_transformations,
            TransformationOperations({
                "country": "./GeoLite2-Country.mmdb",
                "city": "./GeoLite2-City.mmdb",
                "asn": "./GeoLite2-ASN.mmdb"
            }))

        transformation = creator.build_lambda()

        self.assertIsInstance(transformation, types.LambdaType,
                              "Transformation type should be lambda")

        spark = SparkSession.builder.getOrCreate()
        file = spark.read.csv(DATA_PATH, self.data_structure_pyspark)

        result = file.rdd.map(transformation)

        result = result.collect()

        self.assertListEqual(result,
                             [("217.69.143.60", "91.221.61.183", 378880),
                              ("91.221.61.168", "90.188.114.141", 348160),
                              ("91.226.13.80", "5.136.78.36", 7731200),
                              ("192.168.30.2", "192.168.30.1", 947200),
                              ("192.168.30.2", "192.168.30.1", 947200)],
                             "List of tuples should be equal")

        spark.stop()
コード例 #8
0
    def test_build_lambda_processor_add(self):
        self.maxDiff = None
        parser = TransformationsParser([
            "dst_ip: add(-13.5, 2)", "src_ip:add(-13.5,2)",
            "foobar: 'add(-13.5,2)'", "foobar2: 'add\\'(-13.5,2)'"
        ])
        parser.run()
        operations = TransformationOperations(self.config)

        transformations_validator = TransformationsValidator(
            operations, self.data_structure)
        _ = transformations_validator.validate(parser.expanded_transformation)
        creator = TransformationCreator(self.data_structure,
                                        parser.expanded_transformation,
                                        TransformationOperations(self.config))

        transformation = creator.build_lambda()

        self.assertIsInstance(transformation, types.LambdaType,
                              "Transformation type should be lambda")

        spark = SparkSession.builder.getOrCreate()
        file = spark.read.csv(DATA_PATH, self.data_structure_pyspark)

        result = file.rdd.map(transformation)

        result = result.collect()

        self.assertListEqual(result,
                             [(-11.5, -11.5, 'add(-13.5,2)', "add'(-13.5,2)"),
                              (-11.5, -11.5, 'add(-13.5,2)', "add'(-13.5,2)"),
                              (-11.5, -11.5, 'add(-13.5,2)', "add'(-13.5,2)"),
                              (-11.5, -11.5, 'add(-13.5,2)', "add'(-13.5,2)"),
                              (-11.5, -11.5, 'add(-13.5,2)', "add'(-13.5,2)")],
                             "List of tuples should be equal")

        spark.stop()
コード例 #9
0
    def test_build_lambda_concat_with_nested_mul(self):
        mult_syntax_tree = SyntaxTree()
        mult_syntax_tree.operation = "mul"
        mult_syntax_tree.children = [6, "packet_size"]
        mult_syntax_tree_root = SyntaxTree()
        mult_syntax_tree_root.operation = "concat"
        mult_syntax_tree_root.children = [
            mult_syntax_tree, "' -- xe \' 2/3 mul(3,3) FooBar'"
        ]

        parsed_transformations = [
            FieldTransformation("traffic", mult_syntax_tree_root)
        ]
        creator = TransformationCreator(self.data_structure,
                                        parsed_transformations,
                                        TransformationOperations(self.config))

        transformation = creator.build_lambda()

        self.assertIsInstance(transformation, types.LambdaType,
                              "Transformation type should be lambda")

        spark = SparkSession.builder.getOrCreate()
        file = spark.read.csv(DATA_PATH, self.data_structure_pyspark)

        result = file.rdd.map(transformation)

        result = result.collect()
        self.assertListEqual(result, [('444 -- xe \' 2/3 mul(3,3) FooBar', ),
                                      ('408 -- xe \' 2/3 mul(3,3) FooBar', ),
                                      ('9060 -- xe \' 2/3 mul(3,3) FooBar', ),
                                      ('1110 -- xe \' 2/3 mul(3,3) FooBar', ),
                                      ('1110 -- xe \' 2/3 mul(3,3) FooBar', )],
                             "List of tuples should be equal")

        spark.stop()