Beispiel #1
0
    def initialize(self):
        """Initialize SparkDfConverter."""
        # check input arguments
        self.check_arg_types(read_key=str,
                             output_format=str,
                             process_meth_args=dict,
                             process_meth_kwargs=dict)
        self.check_arg_types(allow_none=True, store_key=str, schema_key=str)
        self.check_arg_vals('read_key')
        self.preserve_col_names = bool(self.preserve_col_names)
        self.fail_missing_data = bool(self.fail_missing_data)
        if not self.store_key:
            self.store_key = self.read_key
        if not self.schema_key:
            self.schema_key = '{}_schema'.format(self.store_key)

        # check output format
        self.output_format = self.output_format.lower()
        if self.output_format not in OUTPUT_FORMATS:
            self.logger.fatal(
                'Specified data output format "{format}" is invalid.',
                format=self.output_format)
            raise RuntimeError('Invalid output format specified.')

        # set process methods
        self._process_methods = process_transform_funcs(
            self.process_methods, self.process_meth_args,
            self.process_meth_kwargs)

        return StatusCode.Success
    def initialize(self):
        """Initialize the link."""
        # check input arguments
        self.check_arg_types(store_key=str,
                             read_meth_args=dict,
                             read_meth_kwargs=dict)
        self.check_arg_vals('store_key', 'read_methods')

        # process data-frame-reader methods
        self._read_methods = process_transform_funcs(self.read_methods,
                                                     self.read_meth_args,
                                                     self.read_meth_kwargs)

        return StatusCode.Success
Beispiel #3
0
    def initialize(self):
        """Initialize the link."""
        # check input arguments
        self.check_arg_types(read_key=str,
                             process_meth_args=dict,
                             process_meth_kwargs=dict)
        self.check_arg_types(allow_none=True, store_key=str)
        self.check_arg_vals('read_key')
        self.fail_missing_data = bool(self.fail_missing_data)
        if not self.store_key:
            self.store_key = self.read_key

        # process post-process methods
        self._process_methods = process_transform_funcs(
            self.process_methods, self.process_meth_args,
            self.process_meth_kwargs)

        return StatusCode.Success
    def initialize(self):
        """Initialize the link."""
        # check input arguments
        self.check_arg_types(read_key=str,
                             write_meth_args=dict,
                             write_meth_kwargs=dict)
        self.check_arg_vals('read_key', 'write_methods')
        self.fail_missing_data = bool(self.fail_missing_data)
        if self.num_files < 1:
            raise RuntimeError(
                'Requested number of files is less than 1 ({:d}).'.format(
                    self.num_files))

        # process data-frame-writer methods
        self._write_methods = process_transform_funcs(self.write_methods,
                                                      self.write_meth_args,
                                                      self.write_meth_kwargs)

        return StatusCode.Success
Beispiel #5
0
    def test_process_transform_funcs(self):
        """Test processing transformation functions"""

        # create mock functions and arguments
        funcs = [
            'func0', 'func1', 'func2',
            mock.Mock(name='func3'),
            ('func4',
             tuple(mock.Mock(name='arg4_{:d}'.format(it)) for it in range(2)),
             dict(('key{:d}'.format(it),
                   mock.Mock(name='kwarg4_{:d}'.format(it)))
                  for it in range(2)))
        ]
        func_args = {
            'func1': (),
            'func2':
            tuple(mock.Mock(name='arg2_{:d}'.format(it)) for it in range(2)),
            funcs[3]:
            tuple(mock.Mock(name='arg3_{:d}'.format(it)) for it in range(2))
        }
        func_kwargs = {
            'func1': {},
            'func2': {},
            funcs[3]:
            dict(('key{:d}'.format(it),
                  mock.Mock(name='kwarg3_{:d}'.format(it))) for it in range(2))
        }

        # expected returned values
        funcs_normal = [
            f if isinstance(f, tuple) else
            (f, tuple(func_args.get(f, ())), dict(func_kwargs.get(f, {})))
            for f in funcs
        ]
        funcs_no_args = [
            f if isinstance(f, tuple) else (f, (), {}) for f in funcs
        ]

        # test normal operation (1)
        ret_funcs = process_transform_funcs(funcs, func_args, func_kwargs)
        self.assertListEqual(
            ret_funcs, funcs_normal,
            'unexpected list of functions for normal operation (1)')

        # test normal operation (2)
        ret_funcs = process_transform_funcs(trans_funcs=funcs,
                                            func_args=func_args,
                                            func_kwargs=func_kwargs)
        self.assertListEqual(
            ret_funcs, funcs_normal,
            'unexpected list of functions for normal operation (2)')

        # test operation without explicit arguments
        ret_funcs = process_transform_funcs(funcs)
        self.assertListEqual(
            ret_funcs, funcs_no_args,
            'unexpected list of functions without explicit arguments')

        # test specifying incorrect function format
        with self.assertRaises(ValueError):
            process_transform_funcs([('foo', )])

        # test specifying incorrect arguments
        with self.assertRaises(TypeError):
            process_transform_funcs([('foo', mock.Mock(name='args'), {})])
        with self.assertRaises(TypeError):
            process_transform_funcs(['foo'],
                                    func_args=dict(foo=mock.Mock(name='args')))
        with self.assertRaises(ValueError):
            process_transform_funcs(['foo'],
                                    func_args=dict(bar=mock.Mock(name='args')))
        with self.assertRaises(RuntimeError):
            process_transform_funcs(
                [('foo', mock.Mock(name='args1'), {})],
                func_args=dict(foo=mock.Mock(name='args2')))

        # test specifying incorrect keyword arguments
        with self.assertRaises(TypeError):
            process_transform_funcs([('foo', (), mock.Mock(name='kwargs'))])
        with self.assertRaises(TypeError):
            process_transform_funcs(
                ['foo'], func_kwargs=dict(foo=mock.Mock(name='kwargs')))
        with self.assertRaises(ValueError):
            process_transform_funcs(
                ['foo'], func_kwargs=dict(bar=mock.Mock(name='kwargs')))
        with self.assertRaises(RuntimeError):
            process_transform_funcs(
                [('foo', (), mock.Mock(name='kwargs1'))],
                func_kwargs=dict(foo=mock.Mock(name='kwargs2')))

        # test with non-callable "function"
        with self.assertRaises(TypeError):
            process_transform_funcs([type('non_callable', (), {})()])