Beispiel #1
0
    def initialize(self):
        """Inititialize SparkDfConverter"""

        # check input arguments
        self.check_arg_types(read_key=str,
                             output_format=str,
                             process_meth_args=dict,
                             process_meth_kwargs=dict)
        self.check_arg_types(allow_none=True, store_key=str, schema_key=str)
        self.check_arg_vals('read_key')
        self.preserve_col_names = bool(self.preserve_col_names)
        self.fail_missing_data = bool(self.fail_missing_data)
        if not self.store_key:
            self.store_key = self.read_key
        if not self.schema_key:
            self.schema_key = '{}_schema'.format(self.store_key)

        # check output format
        self.output_format = self.output_format.lower()
        if self.output_format not in OUTPUT_FORMATS:
            self.log().critical('Specified data output format "%s" is invalid',
                                self.output_format)
            raise RuntimeError('invalid output format specified')

        # set process methods
        self._process_methods = process_transform_funcs(
            self.process_methods, self.process_meth_args,
            self.process_meth_kwargs)

        return StatusCode.Success
Beispiel #2
0
    def initialize(self):
        """Initialize the link."""
        # check input arguments
        self.check_arg_types(store_key=str,
                             read_meth_args=dict,
                             read_meth_kwargs=dict)
        self.check_arg_vals('store_key', 'read_methods')

        # process data-frame-reader methods
        self._read_methods = process_transform_funcs(self.read_methods,
                                                     self.read_meth_args,
                                                     self.read_meth_kwargs)

        return StatusCode.Success
Beispiel #3
0
    def initialize(self):
        """Inititialize SparkDfCreator"""

        # check input arguments
        self.check_arg_types(read_key=str, process_meth_args=dict, process_meth_kwargs=dict)
        self.check_arg_types(allow_none=True, store_key=str)
        self.check_arg_vals('read_key')
        self.fail_missing_data = bool(self.fail_missing_data)
        if not self.store_key:
            self.store_key = self.read_key

        # process post-process methods
        self._process_methods = process_transform_funcs(self.process_methods, self.process_meth_args,
                                                        self.process_meth_kwargs)

        return StatusCode.Success
Beispiel #4
0
    def initialize(self):
        """Initialize the link."""
        # check input arguments
        self.check_arg_types(read_key=str,
                             write_meth_args=dict,
                             write_meth_kwargs=dict)
        self.check_arg_vals('read_key', 'write_methods')
        self.fail_missing_data = bool(self.fail_missing_data)
        if self.num_files < 1:
            raise RuntimeError(
                'Requested number of files is less than 1 ({:d}).'.format(
                    self.num_files))

        # process data-frame-writer methods
        self._write_methods = process_transform_funcs(self.write_methods,
                                                      self.write_meth_args,
                                                      self.write_meth_kwargs)

        return StatusCode.Success
Beispiel #5
0
    def test_process_transform_funcs(self):
        """Test processing transformation functions"""

        # create mock functions and arguments
        funcs = ['func0', 'func1', 'func2', mock.Mock(name='func3'),
                 ('func4', tuple(mock.Mock(name='arg4_{:d}'.format(it)) for it in range(2)),
                  dict(('key{:d}'.format(it), mock.Mock(name='kwarg4_{:d}'.format(it))) for it in range(2)))]
        func_args = {'func1': (), 'func2': tuple(mock.Mock(name='arg2_{:d}'.format(it)) for it in range(2)),
                     funcs[3]: tuple(mock.Mock(name='arg3_{:d}'.format(it)) for it in range(2))}
        func_kwargs = {'func1': {}, 'func2': {},
                       funcs[3]: dict(('key{:d}'.format(it), mock.Mock(name='kwarg3_{:d}'.format(it)))
                                      for it in range(2))}

        # expected returned values
        funcs_normal = [f if isinstance(f, tuple) else (f, tuple(func_args.get(f, ())), dict(func_kwargs.get(f, {})))
                        for f in funcs]
        funcs_no_args = [f if isinstance(f, tuple) else (f, (), {}) for f in funcs]

        # test normal operation (1)
        ret_funcs = process_transform_funcs(funcs, func_args, func_kwargs)
        self.assertListEqual(ret_funcs, funcs_normal, 'unexpected list of functions for normal operation (1)')

        # test normal operation (2)
        ret_funcs = process_transform_funcs(trans_funcs=funcs, func_args=func_args, func_kwargs=func_kwargs)
        self.assertListEqual(ret_funcs, funcs_normal, 'unexpected list of functions for normal operation (2)')

        # test operation without explicit arguments
        ret_funcs = process_transform_funcs(funcs)
        self.assertListEqual(ret_funcs, funcs_no_args, 'unexpected list of functions without explicit arguments')

        # test specifying incorrect function format
        with self.assertRaises(ValueError):
            process_transform_funcs([('foo',)])

        # test specifying incorrect arguments
        with self.assertRaises(TypeError):
            process_transform_funcs([('foo', mock.Mock(name='args'), {})])
        with self.assertRaises(TypeError):
            process_transform_funcs(['foo'], func_args=dict(foo=mock.Mock(name='args')))
        with self.assertRaises(ValueError):
            process_transform_funcs(['foo'], func_args=dict(bar=mock.Mock(name='args')))
        with self.assertRaises(RuntimeError):
            process_transform_funcs([('foo', mock.Mock(name='args1'), {})],
                                    func_args=dict(foo=mock.Mock(name='args2')))

        # test specifying incorrect keyword arguments
        with self.assertRaises(TypeError):
            process_transform_funcs([('foo', (), mock.Mock(name='kwargs'))])
        with self.assertRaises(TypeError):
            process_transform_funcs(['foo'], func_kwargs=dict(foo=mock.Mock(name='kwargs')))
        with self.assertRaises(ValueError):
            process_transform_funcs(['foo'], func_kwargs=dict(bar=mock.Mock(name='kwargs')))
        with self.assertRaises(RuntimeError):
            process_transform_funcs([('foo', (), mock.Mock(name='kwargs1'))],
                                    func_kwargs=dict(foo=mock.Mock(name='kwargs2')))

        # test with non-callable "function"
        with self.assertRaises(TypeError):
            process_transform_funcs([type('non_callable', (), {})()])