コード例 #1
0
ファイル: io_test.py プロジェクト: hartym/rdc.etl
    def test_multiple_input(self):
        imux = InputMultiplexer([CH1, CH2])

        # inactive on mux creation
        self.assertRaises(InactiveWritableError, imux[CH1].put, 'foo')
        self.assertRaises(InactiveWritableError, imux[CH2].put, 'foo')

        # activate
        imux[CH1].put(Begin)
        imux[CH1].put('foo')
        imux[CH1].put('foo-oo')

        # simple get.
        self.assertEqual(imux.get(), ('foo', CH1, ))

        imux[CH1].put(End)

        imux[CH2].put(Begin)
        imux[CH2].put('bar')
        imux[CH2].put('ba-ar')
        imux[CH2].put(End)

        self.assertEqual(imux.get(), ('foo-oo', CH1, ))
        self.assertEqual(imux.get(), ('bar', CH2, ))
        self.assertEqual(imux.get(), ('ba-ar', CH2, ))
        self.assertRaises(InactiveReadableError, imux.get)
コード例 #2
0
    def __init__(self, transform=None, input_channels=None, output_channels=None):
        # Use the callable name if provided
        if transform and not self._name:
            self._name = transform.__name__

        self.INPUT_CHANNELS = input_channels or self.INPUT_CHANNELS
        self.OUTPUT_CHANNELS = output_channels or self.OUTPUT_CHANNELS

        self._input = InputMultiplexer(self.INPUT_CHANNELS)
        self._output = OutputDemultiplexer(self.OUTPUT_CHANNELS)
        self._exec_time = 0.0
        self._exec_count = 0

        self._booted = False
        self._initialized = False
        self._finalized = False

        self.transform = transform or self.transform
コード例 #3
0
ファイル: __init__.py プロジェクト: fabienpomerol/etl
    def __init__(self, transform=None, input_channels=None, output_channels=None):
        self.transform = transform or self.transform
        self.INPUT_CHANNELS = input_channels or self.INPUT_CHANNELS
        self.OUTPUT_CHANNELS = output_channels or self.OUTPUT_CHANNELS

        self._s_in = 0
        self._s_out = 0

        self._input = InputMultiplexer(self.INPUT_CHANNELS)
        self._output = OutputDemultiplexer(self.OUTPUT_CHANNELS)

        self._initialized = False
        self._finalized = False

        self._name = self.__class__.__name__
コード例 #4
0
ファイル: __init__.py プロジェクト: hartym/rdc.etl
    def __init__(self, transform=None, input_channels=None, output_channels=None):
        # Use the callable name if provided
        if transform and not self._name:
            self._name = transform.__name__

        self.INPUT_CHANNELS = input_channels or self.INPUT_CHANNELS
        self.OUTPUT_CHANNELS = output_channels or self.OUTPUT_CHANNELS

        self._input = InputMultiplexer(self.INPUT_CHANNELS)
        self._output = OutputDemultiplexer(self.OUTPUT_CHANNELS)
        self._exec_time = 0.0
        self._exec_count = 0

        self._booted = False
        self._initialized = False
        self._finalized = False

        self.transform = transform or self.transform
コード例 #5
0
ファイル: io_test.py プロジェクト: yelord/rdc.etl
    def test_multiple_input(self):
        imux = InputMultiplexer([CH1, CH2])

        # inactive on mux creation
        self.assertRaises(InactiveWritableError, imux[CH1].put, 'foo')
        self.assertRaises(InactiveWritableError, imux[CH2].put, 'foo')

        # activate
        imux[CH1].put(Begin)
        imux[CH1].put('foo')
        imux[CH1].put('foo-oo')

        # simple get.
        self.assertEqual(imux.get(), (
            'foo',
            CH1,
        ))

        imux[CH1].put(End)

        imux[CH2].put(Begin)
        imux[CH2].put('bar')
        imux[CH2].put('ba-ar')
        imux[CH2].put(End)

        self.assertEqual(imux.get(), (
            'foo-oo',
            CH1,
        ))
        self.assertEqual(imux.get(), (
            'bar',
            CH2,
        ))
        self.assertEqual(imux.get(), (
            'ba-ar',
            CH2,
        ))
        self.assertRaises(InactiveReadableError, imux.get)
コード例 #6
0
ファイル: __init__.py プロジェクト: fabienpomerol/etl
class Transform(ITransform):
    """Base class and decorator for transformations.

    .. automethod:: transform

    .. attribute:: INPUT_CHANNELS

        List of input channel names.

    .. attribute:: OUTPUT_CHANNELS

        List of output channel names

    Example::

        >>> @Transform
        ... def my_transform(hash, channel=STDIN):
        ...     yield hash.copy({'foo': hash['foo'].upper()})

        >>> print list(my_transform(
        ...         H(('foo', 'bar'), ('bar', 'alpha')),
        ...         H(('foo', 'baz'), ('bar', 'omega')),
        ...     ))
        [H{'foo': 'BAR', 'bar': 'alpha'}, H{'foo': 'BAZ', 'bar': 'omega'}]

    """

    INPUT_CHANNELS = (STDIN, )
    OUTPUT_CHANNELS = (STDOUT, STDERR, )

    def __init__(self, transform=None, input_channels=None, output_channels=None):
        self.transform = transform or self.transform
        self.INPUT_CHANNELS = input_channels or self.INPUT_CHANNELS
        self.OUTPUT_CHANNELS = output_channels or self.OUTPUT_CHANNELS

        self._s_in = 0
        self._s_out = 0

        self._input = InputMultiplexer(self.INPUT_CHANNELS)
        self._output = OutputDemultiplexer(self.OUTPUT_CHANNELS)

        self._initialized = False
        self._finalized = False

        self._name = self.__class__.__name__

    def __call__(self, *stream, **options):
        channel = options['channel'] if 'channel' in options else STDIN

        for hash in stream:
            if not isinstance(hash, Hash):
                hash = Hash(hash)

            for line in self.transform(hash, channel):
                yield line

    # ITransform implementation

    def transform(self, hash, channel=STDIN):
        """Core transformation method that will be called for each input data row."""
        raise NotImplementedError('Abstract.')

    # IO related

    def step(self, finalize=False):
        if not self._initialized:
            self._initialized = True
            self.__execute_and_handle_output(self.initialize)

        try:
            # Pull data from the first available input channel (blocking)
            data, channel = self._input.get()
            self._s_in += 1
            # Execute actual transformation
            self.__execute_and_handle_output(self.transform, data, channel)
        finally:
            if finalize and not self._finalized:
                self._finalized = True
                self.__execute_and_handle_output(self.finalize)
                self._output.put_all(End)

    def initialize(self):
        """If you need to execute code before any item is transformed, this is the place."""
        pass

    def finalize(self):
        """If you need to execute code after all items are transformed, this is the place. It's especially usefull for
        buffering transformations, or other blocking types."""
        pass

    @property
    def virgin(self):
        """Whether or not this transformation already contains a yucca (spéciale dédicace)."""
        return not self._initialized and not self._finalized

    # Name, statistics and representation logic. This is basic but important, as it will serve visualisation/debugging
    # purpose.

    @property
    def __name__(self):
        return self._name

    @__name__.setter
    def __name__(self, value):
        self._name = value

    def get_stats(self):
        return OrderedDict((
            ('in', self._s_in),
            ('out', self._s_out),
        ))

    def get_stats_as_string(self):
        return ' '.join(['%s=%d' % (k, v) for k, v in self.get_stats().items()])

    def __repr__(self):
        return '<' + self.__name__ + ' ' + self.get_stats_as_string() + '>'

    # Private

    def __execute_and_handle_output(self, callable, *args, **kwargs):
        """Runs a transformation callable with given args/kwargs and flush the result into the right
        output channel."""
        results = callable(*args, **kwargs)
        # Put data onto output channels
        if isinstance(results, types.GeneratorType):
            for data in results:
                # todo better stats
                self._s_out += 1
                self._output.put(data)
        elif results is not None:
            self._s_out += 1
            self._output.put(results)
コード例 #7
0
ファイル: __init__.py プロジェクト: hartym/rdc.etl
class Transform(ITransform, Statisticable):
    """Base class and decorator for transformations.

    .. automethod:: transform

    .. attribute:: INPUT_CHANNELS

        List of input channel names.

    .. attribute:: OUTPUT_CHANNELS

        List of output channel names

    Example::

        >>> @Transform
        ... def my_transform(hash, channel=STDIN):
        ...     yield hash.copy({'foo': hash['foo'].upper()})

        >>> print list(my_transform(
        ...         H(('foo', 'bar'), ('bar', 'alpha')),
        ...         H(('foo', 'baz'), ('bar', 'omega')),
        ...     ))
        [H{'foo': 'BAR', 'bar': 'alpha'}, H{'foo': 'BAZ', 'bar': 'omega'}]

    """

    INPUT_CHANNELS = (STDIN, )
    OUTPUT_CHANNELS = (STDOUT, STDERR, )
    _name = None

    def __init__(self, transform=None, input_channels=None, output_channels=None):
        # Use the callable name if provided
        if transform and not self._name:
            self._name = transform.__name__

        self.INPUT_CHANNELS = input_channels or self.INPUT_CHANNELS
        self.OUTPUT_CHANNELS = output_channels or self.OUTPUT_CHANNELS

        self._input = InputMultiplexer(self.INPUT_CHANNELS)
        self._output = OutputDemultiplexer(self.OUTPUT_CHANNELS)
        self._exec_time = 0.0
        self._exec_count = 0

        self._booted = False
        self._initialized = False
        self._finalized = False

        self.transform = transform or self.transform

    def __call__(self, *stream, **options):
        channel = options['channel'] if 'channel' in options else STDIN

        for hash in stream:
            if not isinstance(hash, Hash):
                hash = Hash(hash)

            for line in self.transform(hash, channel):
                yield line

    # ITransform implementation

    def transform(self, hash, channel=STDIN):
        """Core transformation method that will be called for each input data row."""
        raise AbstractError(self.transform)

    # IO related

    def step(self, finalize=False):
        if not self._booted:
            # todo find something to make this work
            self.boot()
            self._booted = True

        if not self._initialized:
            self._initialized = True
            self.__execute_and_handle_output(self.initialize)

        try:
            # Pull data from the first available input channel (blocking)
            data, channel = self._input.get(timeout=1)
            # Execute actual transformation
            self.__execute_and_handle_output(self.transform, data, channel)
        finally:
            if finalize and not self._finalized:
                self._finalized = True
                self.__execute_and_handle_output(self.finalize)
                self._output.put_all(End)

    def boot(self):
        """Just before transformation is started, validate everything is ready."""
        pass

    def initialize(self):
        """If you need to execute code before any item is transformed, this is the place."""
        pass

    def finalize(self):
        """If you need to execute code after all items are transformed, this is the place. It's especially usefull for
        buffering transformations, or other blocking types."""
        pass

    @property
    def virgin(self):
        """Whether or not this transformation already contains a yucca (spéciale dédicace)."""
        return not self._initialized and not self._finalized

    # Name, statistics and representation logic. This is basic but important, as it will serve visualisation/debugging
    # purpose.

    @property
    def __name__(self):
        return self._name or type(self).__name__

    @__name__.setter
    def __name__(self, value):
        self._name = value

    def get_local_stats(self, debug=False, profile=False):
        if profile:
            return (
                (u'τ', '%.2fs' % (self._exec_time, ), ),
                (u'ε', self._exec_count, ),
                (u'τ.ε⁻¹', ((self._exec_count > 0) and (u'%.1fms' % (1000 * self._exec_time / self._exec_count, )) or u'∞'), ),
                (u'ε.τ⁻¹', ((self._exec_time > 0) and (u'%.1f/s' % (self._exec_count / self._exec_time, )) or u'∞'), ),
            )
        return ()

    def get_stats(self, debug=False, profile=False):
        stats = itertools.chain(
            self._input.stats,
            self._output.stats,
            self.get_local_stats(debug=debug, profile=profile)
        )
        return (
            (name, stat, ) for name, stat in stats
        )


    def __repr__(self):
        return u'<{0} {1}>'.format(self.__name__, self.get_unicode_stats())

    # Private
    def __execute_and_handle_output(self, callable, *args, **kwargs):
        """Runs a transformation callable with given args/kwargs and flush the result into the right
        output channel."""

        timer = Timer()
        with timer:
            results = callable(*args, **kwargs)
        self._exec_time += timer.duration

        # Put data onto output channels
        if isinstance(results, types.GeneratorType):
            while True:
                timer = Timer()
                with timer:
                    try:
                        result = results.next()
                    except StopIteration as e:
                        break
                self._exec_time += timer.duration
                self._exec_count += 1
                self._output.put(result)
        elif results is not None:
            self._exec_count += 1
            self._output.put(results)
        else:
            self._exec_count += 1
コード例 #8
0
class Transform(ITransform, Statisticable):
    """Base class and decorator for transformations.

    .. automethod:: transform

    .. attribute:: INPUT_CHANNELS

        List of input channel names.

    .. attribute:: OUTPUT_CHANNELS

        List of output channel names

    Example::

        >>> @Transform
        ... def my_transform(hash, channel=STDIN):
        ...     yield hash.copy({'foo': hash['foo'].upper()})

        >>> print list(my_transform(
        ...         H(('foo', 'bar'), ('bar', 'alpha')),
        ...         H(('foo', 'baz'), ('bar', 'omega')),
        ...     ))
        [H{'foo': 'BAR', 'bar': 'alpha'}, H{'foo': 'BAZ', 'bar': 'omega'}]

    """

    INPUT_CHANNELS = (STDIN, )
    OUTPUT_CHANNELS = (STDOUT, STDERR, )
    _name = None

    def __init__(self, transform=None, input_channels=None, output_channels=None):
        # Use the callable name if provided
        if transform and not self._name:
            self._name = transform.__name__

        self.INPUT_CHANNELS = input_channels or self.INPUT_CHANNELS
        self.OUTPUT_CHANNELS = output_channels or self.OUTPUT_CHANNELS

        self._input = InputMultiplexer(self.INPUT_CHANNELS)
        self._output = OutputDemultiplexer(self.OUTPUT_CHANNELS)
        self._exec_time = 0.0
        self._exec_count = 0

        self._booted = False
        self._initialized = False
        self._finalized = False

        self.transform = transform or self.transform

    def __call__(self, *stream, **options):
        channel = options['channel'] if 'channel' in options else STDIN

        for hash in stream:
            if not isinstance(hash, Hash):
                hash = Hash(hash)

            for line in self.transform(hash, channel):
                yield line

    # ITransform implementation

    def transform(self, hash, channel=STDIN):
        """Core transformation method that will be called for each input data row."""
        raise AbstractError(self.transform)

    # IO related

    def step(self, finalize=False):
        if not self._booted:
            # todo find something to make this work
            self.boot()
            self._booted = True

        if not self._initialized:
            self._initialized = True
            self.__execute_and_handle_output(self.initialize)

        try:
            # Pull data from the first available input channel (blocking)
            data, channel = self._input.get()
            # Execute actual transformation
            self.__execute_and_handle_output(self.transform, data, channel)
        finally:
            if finalize and not self._finalized:
                self._finalized = True
                self.__execute_and_handle_output(self.finalize)
                self._output.put_all(End)

    def boot(self):
        """Just before transformation is started, validate everything is ready."""
        pass

    def initialize(self):
        """If you need to execute code before any item is transformed, this is the place."""
        pass

    def finalize(self):
        """If you need to execute code after all items are transformed, this is the place. It's especially usefull for
        buffering transformations, or other blocking types."""
        pass

    @property
    def virgin(self):
        """Whether or not this transformation already contains a yucca (spéciale dédicace)."""
        return not self._initialized and not self._finalized

    # Name, statistics and representation logic. This is basic but important, as it will serve visualisation/debugging
    # purpose.

    @property
    def __name__(self):
        return self._name or type(self).__name__

    @__name__.setter
    def __name__(self, value):
        self._name = value

    def get_local_stats(self, debug=False, profile=False):
        if profile:
            return (
                (u'τ', '%.2fs' % (self._exec_time, ), ),
                (u'ε', self._exec_count, ),
                (u'τ.ε⁻¹', ((self._exec_count > 0) and (u'%.1fms' % (1000 * self._exec_time / self._exec_count, )) or u'∞'), ),
                (u'ε.τ⁻¹', ((self._exec_time > 0) and (u'%.1f/s' % (self._exec_count / self._exec_time, )) or u'∞'), ),
            )
        return ()

    def get_stats(self, debug=False, profile=False):
        stats = itertools.chain(
            self._input.stats,
            self._output.stats,
            self.get_local_stats(debug=debug, profile=profile)
        )
        return (
            (name, stat, ) for name, stat in stats
        )


    def __repr__(self):
        return u'<{0} {1}>'.format(self.__name__, self.get_unicode_stats())

    # Private
    def __execute_and_handle_output(self, callable, *args, **kwargs):
        """Runs a transformation callable with given args/kwargs and flush the result into the right
        output channel."""

        timer = Timer()
        with timer:
            results = callable(*args, **kwargs)
        self._exec_time += timer.duration

        # Put data onto output channels
        if isinstance(results, types.GeneratorType):
            while True:
                timer = Timer()
                with timer:
                    try:
                        result = results.next()
                    except StopIteration as e:
                        break
                self._exec_time += timer.duration
                self._exec_count += 1
                self._output.put(result)
        elif results is not None:
            self._exec_count += 1
            self._output.put(results)
        else:
            self._exec_count += 1