from rdc.etl.extra.example import Harness, build_producer, run from rdc.etl.io import STDIN2 from rdc.etl.status.console import ConsoleStatus from rdc.etl.transform.flow.sort import Sort from rdc.etl.transform.flow.sortedjoin import SortedJoin from rdc.etl.transform.util import Log print('#################') print('# Software sort #') print('#################') print print('Producer -> Sort -> Log') h = Harness() h.status.append(ConsoleStatus()) p1 = build_producer('Producer 1') h.add_chain(p1, Sort(key=('id', )), Log()) run(h) print('###############') print('# Sorted Join #') print('###############') print print("Producer1 -> Sort -(stdin)---> SortedJoin --> Log") print("Producer2 -> Sort -(stdin2)-'") h = Harness() h.status.append(ConsoleStatus()) p1 = build_producer('Producer 1') p2 = build_producer('Producer 2', get_value=lambda id: int(id) * 42,
# limitations under the License. from rdc.etl.extra.example import Harness, build_producer, build_simple_transform, run from rdc.etl.io import STDOUT, STDOUT2 from rdc.etl.transform.flow.split import Split from rdc.etl.transform.util import Log print('################') print('# Linear shape #') print('################') print print('Producer -> SimpleTransform -> Log') h = Harness() p1 = build_producer('Producer 1') h.add_chain(p1, build_simple_transform(), Log()) run(h) print('#####################################') print('# Split shape (2 different outputs) #') print('#####################################') print print('Producer -> Split ---(stdout)--> SimpleTransform1 -> Log1') print(' `-(stdout2)-> SimpleTransform2 -> Log2') h = Harness() producer = build_producer('Producer 1', 10) split = Split(output_selector = lambda h: h.get('id') % 2 and STDOUT2 or STDOUT) h.add_chain(producer, split, build_simple_transform(), Log())
# See the License for the specific language governing permissions and # limitations under the License. from rdc.etl.extra.example import Harness, build_producer, build_simple_transform, run from rdc.etl.io import STDOUT, STDOUT2 from rdc.etl.transform.flow.split import Split from rdc.etl.transform.util import Log print('################') print('# Linear shape #') print('################') print print('Producer -> SimpleTransform -> Log') h = Harness() p1 = build_producer('Producer 1') h.add_chain(p1, build_simple_transform(), Log()) run(h) print('#####################################') print('# Split shape (2 different outputs) #') print('#####################################') print print('Producer -> Split ---(stdout)--> SimpleTransform1 -> Log1') print(' `-(stdout2)-> SimpleTransform2 -> Log2') h = Harness() producer = build_producer('Producer 1', 10) split = Split(output_selector=lambda h: h.get('id') % 2 and STDOUT2 or STDOUT) h.add_chain(producer, split, build_simple_transform(), Log()) h.add_chain(build_simple_transform('lower'), Log(), input=(
from rdc.etl.extra.example import Harness, build_producer, run from rdc.etl.extra.simple import SimpleTransform from rdc.etl.transform.util import Log print('########################') print('# Simplest ETL process #') print('########################') print print('Producer -> Transform -> Log') h = Harness() p = build_producer('Producer') t = SimpleTransform() h.add_chain(p, t, Log()) run(h) print '\n'.join(map(repr, h.get_threads()))
from rdc.etl.extra.example import Harness, build_producer, run from rdc.etl.io import STDIN2 from rdc.etl.status.console import ConsoleStatus from rdc.etl.transform.flow.sort import Sort from rdc.etl.transform.flow.sortedjoin import SortedJoin from rdc.etl.transform.util import Log print('#################') print('# Software sort #') print('#################') print print('Producer -> Sort -> Log') h = Harness() h.status.append(ConsoleStatus()) p1 = build_producer('Producer 1') h.add_chain(p1, Sort(key=('id',)), Log()) run(h) print('###############') print('# Sorted Join #') print('###############') print print("Producer1 -> Sort -(stdin)---> SortedJoin --> Log") print("Producer2 -> Sort -(stdin2)-'") h = Harness() h.status.append(ConsoleStatus()) p1 = build_producer('Producer 1') p2 = build_producer('Producer 2', get_value=lambda id: int(id) * 42, value_name='price') sj = SortedJoin(key=('id', ))
# -*- coding: utf-8 -*- import time from rdc.etl.harness.threaded import ThreadedHarness from rdc.etl.extra.example import build_producer, run from rdc.etl.status.console import ConsoleStatus from rdc.etl.status.http import HttpStatus from rdc.etl.transform import Transform h = ThreadedHarness() p1 = build_producer('Producer 1', count=500) @Transform def delay(h, c): time.sleep(0.2) yield h @Transform def delay2(h, c): time.sleep(0.5) yield h h.add_chain(p1, delay, delay2) h.status.append(ConsoleStatus()) h.status.append(HttpStatus()) run(h)