def test_validate_raise_field_not_exists_when_rename_field(self): validator = TransformationsValidator( TransformationOperations(CONFIG_PATH), self.data_structure_pyspark) with self.assertRaises(errors.FieldNotExists): validator.validate( [FieldTransformation("size", "not_exists_field"), "dst_ip"])
def test_validate_raise_field_not_exists_error(self): validator = TransformationsValidator( TransformationOperations(CONFIG_PATH), self.data_structure_pyspark) with self.assertRaises(errors.FieldNotExists): validator.validate( ["src_ip", "dst_ip", "packet_size", "sample_rate"])
def test_build_lambda_processor_config(self): parser = TransformationsParser(["a: config('input.options.port')"]) parser.run() operations = TransformationOperations(self.config) transformations_validator = TransformationsValidator( operations, self.data_structure) _ = transformations_validator.validate(parser.expanded_transformation) creator = TransformationCreator(self.data_structure, parser.expanded_transformation, TransformationOperations(self.config)) transformation = creator.build_lambda() self.assertIsInstance(transformation, types.LambdaType, "Transformation type should be lambda") spark = SparkSession.builder.getOrCreate() file = spark.read.csv(DATA_PATH, self.data_structure_pyspark) result = file.rdd.map(transformation) result = result.collect() self.assertListEqual(result, [(29092, ), (29092, ), (29092, ), (29092, ), (29092, )], "List of tuples should be equal") spark.stop()
def test_build_lambda_numbers(self): st = SyntaxTree() st.operation = "_" st.children = [13] # as if it parsed parsed_transformations = [FieldTransformation("a", st)] operations = TransformationOperations(self.config) transformations_validator = TransformationsValidator( operations, self.data_structure) _ = transformations_validator.validate(parsed_transformations) creator = TransformationCreator(self.data_structure, parsed_transformations, TransformationOperations(self.config)) transformation = creator.build_lambda() self.assertIsInstance(transformation, types.LambdaType, "Transformation type should be lambda") spark = SparkSession.builder.getOrCreate() file = spark.read.csv(DATA_PATH, self.data_structure_pyspark) result = file.rdd.map(transformation) result = result.collect() self.assertListEqual(result, [(13, ), (13, ), (13, ), (13, ), (13, )], "List of tuples should be equal") spark.stop()
def test_validate_raise_operation_not_supported_error(self): validator = TransformationsValidator( TransformationOperations(CONFIG_PATH), self.data_structure_pyspark) syntaxtree = SyntaxTree() syntaxtree.operation = "not_exists_operation" with self.assertRaises(errors.OperationNotSupportedError): validator.validate( [FieldTransformation("size", syntaxtree), "dst_ip"])
def test_validate_raise_incorrect_argument_type_for_operation_error(self): validator = TransformationsValidator( TransformationOperations(CONFIG_PATH), self.data_structure_pyspark) syntaxtree = SyntaxTree() syntaxtree.operation = "mul" syntaxtree.children = ["src_ip", "packet_size"] with self.assertRaises(errors.IncorrectArgumentTypeForOperationError): validator.validate( [FieldTransformation("traffic", syntaxtree), "dst_ip"])
def test_validate_correct_arguments_amount_for_operation_add(self): validator = TransformationsValidator( TransformationOperations(CONFIG_PATH), self.data_structure_pyspark) syntaxtree = SyntaxTree() syntaxtree.operation = "add" syntaxtree.children = [1, 2] fields = validator.validate([FieldTransformation("add", syntaxtree)]) self.assertEqual( fields, types.StructType([types.StructField("add", types.LongType())]))
def test_validate_raise_error_for_function_with_different_arguments_type( self): validator = TransformationsValidator( TransformationOperations(CONFIG_PATH), self.data_structure_pyspark) main_syntax_tree = SyntaxTree() main_syntax_tree.operation = "truncate" main_syntax_tree.children = ["src_ip", "dst_ip"] with self.assertRaises(errors.IncorrectArgumentTypeForOperationError): validator.validate( [FieldTransformation("result", main_syntax_tree)])
def test_validate_rename_field(self): validator = TransformationsValidator( TransformationOperations(CONFIG_PATH), self.data_structure_pyspark) fields = validator.validate( [FieldTransformation("size", "packet_size"), "dst_ip"]) self.assertEqual( fields, types.StructType([ types.StructField('size', types.LongType()), types.StructField('dst_ip', types.StringType()) ]))
def test_validate_raise_incorrect_arguments_amount_for_operation_error( self): validator = TransformationsValidator( TransformationOperations(CONFIG_PATH), self.data_structure_pyspark) syntaxtree = SyntaxTree() syntaxtree.operation = "add" syntaxtree.children = [1, 2, 3] with self.assertRaises( errors.IncorrectArgumentsAmountForOperationError): validator.validate( [FieldTransformation("add", syntaxtree), "dst_ip"])
def test_validate_work_success(self): validator = TransformationsValidator( TransformationOperations(CONFIG_PATH), self.data_structure_pyspark) fields = validator.validate( ["src_ip", "dst_ip", "packet_size", "sampling_rate"]) self.assertEqual( fields, types.StructType([ types.StructField('src_ip', types.StringType()), types.StructField('dst_ip', types.StringType()), types.StructField('packet_size', types.LongType()), types.StructField('sampling_rate', types.LongType()) ]), 'StructType should be equal')
def test_validate_function_with_different_arguments_type(self): validator = TransformationsValidator( TransformationOperations(CONFIG_PATH), self.data_structure_pyspark) tree = SyntaxTree() tree.operation = "truncate" tree.children = ["src_ip", 5] fields = validator.validate([FieldTransformation("result", tree)]) self.assertEqual( fields, types.StructType([types.StructField("result", types.StringType())]))
def test_validate_raise_operation_not_supported_error_for_subtree(self): validator = TransformationsValidator( TransformationOperations(CONFIG_PATH), self.data_structure_pyspark) syntaxtree = SyntaxTree() syntaxtree.operation = "not_exists_operator" syntaxtree.children = ["1", "2"] main_syntax_tree = SyntaxTree() main_syntax_tree.operation = "mul" main_syntax_tree.children = [syntaxtree, "1"] with self.assertRaises(errors.OperationNotSupportedError): validator.validate( [FieldTransformation("result", main_syntax_tree), "dst_ip"])
def test_validate_with_correct_one_level_subtree(self): validator = TransformationsValidator( TransformationOperations(CONFIG_PATH), self.data_structure_pyspark) syntaxtree = SyntaxTree() syntaxtree.operation = "mul" syntaxtree.children = ["packet_size", "sampling_rate"] fields = validator.validate( [FieldTransformation("traffic", syntaxtree), "dst_ip"]) self.assertEqual( fields, types.StructType([ types.StructField('traffic', types.LongType()), types.StructField('dst_ip', types.StringType()) ]))
def test_validate_config_operation(self): validator = TransformationsValidator( TransformationOperations(CONFIG_PATH), self.data_structure_pyspark) syntaxtree = SyntaxTree() syntaxtree.operation = "config" syntaxtree.children = ["'input.options.port'"] main_syntax_tree = SyntaxTree() main_syntax_tree.operation = "concat" main_syntax_tree.children = [syntaxtree, "'sampling_rate'"] fields = validator.validate( [FieldTransformation("result", main_syntax_tree)]) self.assertEqual( fields, types.StructType([types.StructField('result', types.StringType())]))
def test_build_lambda_processor_add(self): self.maxDiff = None parser = TransformationsParser([ "dst_ip: add(-13.5, 2)", "src_ip:add(-13.5,2)", "foobar: 'add(-13.5,2)'", "foobar2: 'add\\'(-13.5,2)'" ]) parser.run() operations = TransformationOperations(self.config) transformations_validator = TransformationsValidator( operations, self.data_structure) _ = transformations_validator.validate(parser.expanded_transformation) creator = TransformationCreator(self.data_structure, parser.expanded_transformation, TransformationOperations(self.config)) transformation = creator.build_lambda() self.assertIsInstance(transformation, types.LambdaType, "Transformation type should be lambda") spark = SparkSession.builder.getOrCreate() file = spark.read.csv(DATA_PATH, self.data_structure_pyspark) result = file.rdd.map(transformation) result = result.collect() self.assertListEqual(result, [(-11.5, -11.5, 'add(-13.5,2)', "add'(-13.5,2)"), (-11.5, -11.5, 'add(-13.5,2)', "add'(-13.5,2)"), (-11.5, -11.5, 'add(-13.5,2)', "add'(-13.5,2)"), (-11.5, -11.5, 'add(-13.5,2)', "add'(-13.5,2)"), (-11.5, -11.5, 'add(-13.5,2)', "add'(-13.5,2)")], "List of tuples should be equal") spark.stop()
def test_validate_with_transformation_primitives(self): validator = TransformationsValidator( TransformationOperations(CONFIG_PATH), self.data_structure_pyspark) # test "{foo: 'bar'}"" tree = SyntaxTree() tree.operation = "concat" tree.children = [1, 2] fields = validator.validate([FieldTransformation("foo", tree)]) self.assertEqual( fields, types.StructType([types.StructField("foo", types.StringType())])) tree.children = ["'1'", "'2'"] fields = validator.validate([FieldTransformation("foo", tree)]) self.assertEqual( fields, types.StructType([types.StructField("foo", types.StringType())])) tree.children = [1E+2, "'1'"] fields = validator.validate([FieldTransformation("foo", tree)]) self.assertEqual( fields, types.StructType([types.StructField("foo", types.StringType())])) tree.children = ["'foo\'bar'", "'2'"] fields = validator.validate([FieldTransformation("foo", tree)]) self.assertEqual( fields, types.StructType([types.StructField("foo", types.StringType())])) tree.children = ["'foo\"bar'", 2] fields = validator.validate([FieldTransformation("foo", tree)]) self.assertEqual( fields, types.StructType([types.StructField("foo", types.StringType())]))