def initialize(self): """Inititialize SparkDfConverter""" # check input arguments self.check_arg_types(read_key=str, output_format=str, process_meth_args=dict, process_meth_kwargs=dict) self.check_arg_types(allow_none=True, store_key=str, schema_key=str) self.check_arg_vals('read_key') self.preserve_col_names = bool(self.preserve_col_names) self.fail_missing_data = bool(self.fail_missing_data) if not self.store_key: self.store_key = self.read_key if not self.schema_key: self.schema_key = '{}_schema'.format(self.store_key) # check output format self.output_format = self.output_format.lower() if self.output_format not in OUTPUT_FORMATS: self.log().critical('Specified data output format "%s" is invalid', self.output_format) raise RuntimeError('invalid output format specified') # set process methods self._process_methods = process_transform_funcs( self.process_methods, self.process_meth_args, self.process_meth_kwargs) return StatusCode.Success
def initialize(self): """Initialize the link.""" # check input arguments self.check_arg_types(store_key=str, read_meth_args=dict, read_meth_kwargs=dict) self.check_arg_vals('store_key', 'read_methods') # process data-frame-reader methods self._read_methods = process_transform_funcs(self.read_methods, self.read_meth_args, self.read_meth_kwargs) return StatusCode.Success
def initialize(self): """Inititialize SparkDfCreator""" # check input arguments self.check_arg_types(read_key=str, process_meth_args=dict, process_meth_kwargs=dict) self.check_arg_types(allow_none=True, store_key=str) self.check_arg_vals('read_key') self.fail_missing_data = bool(self.fail_missing_data) if not self.store_key: self.store_key = self.read_key # process post-process methods self._process_methods = process_transform_funcs(self.process_methods, self.process_meth_args, self.process_meth_kwargs) return StatusCode.Success
def initialize(self): """Initialize the link.""" # check input arguments self.check_arg_types(read_key=str, write_meth_args=dict, write_meth_kwargs=dict) self.check_arg_vals('read_key', 'write_methods') self.fail_missing_data = bool(self.fail_missing_data) if self.num_files < 1: raise RuntimeError( 'Requested number of files is less than 1 ({:d}).'.format( self.num_files)) # process data-frame-writer methods self._write_methods = process_transform_funcs(self.write_methods, self.write_meth_args, self.write_meth_kwargs) return StatusCode.Success
def test_process_transform_funcs(self): """Test processing transformation functions""" # create mock functions and arguments funcs = ['func0', 'func1', 'func2', mock.Mock(name='func3'), ('func4', tuple(mock.Mock(name='arg4_{:d}'.format(it)) for it in range(2)), dict(('key{:d}'.format(it), mock.Mock(name='kwarg4_{:d}'.format(it))) for it in range(2)))] func_args = {'func1': (), 'func2': tuple(mock.Mock(name='arg2_{:d}'.format(it)) for it in range(2)), funcs[3]: tuple(mock.Mock(name='arg3_{:d}'.format(it)) for it in range(2))} func_kwargs = {'func1': {}, 'func2': {}, funcs[3]: dict(('key{:d}'.format(it), mock.Mock(name='kwarg3_{:d}'.format(it))) for it in range(2))} # expected returned values funcs_normal = [f if isinstance(f, tuple) else (f, tuple(func_args.get(f, ())), dict(func_kwargs.get(f, {}))) for f in funcs] funcs_no_args = [f if isinstance(f, tuple) else (f, (), {}) for f in funcs] # test normal operation (1) ret_funcs = process_transform_funcs(funcs, func_args, func_kwargs) self.assertListEqual(ret_funcs, funcs_normal, 'unexpected list of functions for normal operation (1)') # test normal operation (2) ret_funcs = process_transform_funcs(trans_funcs=funcs, func_args=func_args, func_kwargs=func_kwargs) self.assertListEqual(ret_funcs, funcs_normal, 'unexpected list of functions for normal operation (2)') # test operation without explicit arguments ret_funcs = process_transform_funcs(funcs) self.assertListEqual(ret_funcs, funcs_no_args, 'unexpected list of functions without explicit arguments') # test specifying incorrect function format with self.assertRaises(ValueError): process_transform_funcs([('foo',)]) # test specifying incorrect arguments with self.assertRaises(TypeError): process_transform_funcs([('foo', mock.Mock(name='args'), {})]) with self.assertRaises(TypeError): process_transform_funcs(['foo'], func_args=dict(foo=mock.Mock(name='args'))) with self.assertRaises(ValueError): process_transform_funcs(['foo'], func_args=dict(bar=mock.Mock(name='args'))) with self.assertRaises(RuntimeError): process_transform_funcs([('foo', mock.Mock(name='args1'), {})], func_args=dict(foo=mock.Mock(name='args2'))) # test specifying incorrect keyword arguments with self.assertRaises(TypeError): process_transform_funcs([('foo', (), mock.Mock(name='kwargs'))]) with self.assertRaises(TypeError): process_transform_funcs(['foo'], func_kwargs=dict(foo=mock.Mock(name='kwargs'))) with self.assertRaises(ValueError): process_transform_funcs(['foo'], func_kwargs=dict(bar=mock.Mock(name='kwargs'))) with self.assertRaises(RuntimeError): process_transform_funcs([('foo', (), mock.Mock(name='kwargs1'))], func_kwargs=dict(foo=mock.Mock(name='kwargs2'))) # test with non-callable "function" with self.assertRaises(TypeError): process_transform_funcs([type('non_callable', (), {})()])