Example #1
0
from rdc.etl.extra.example import Harness, build_producer, run
from rdc.etl.io import STDIN2
from rdc.etl.status.console import ConsoleStatus
from rdc.etl.transform.flow.sort import Sort
from rdc.etl.transform.flow.sortedjoin import SortedJoin
from rdc.etl.transform.util import Log

print('#################')
print('# Software sort #')
print('#################')
print
print('Producer -> Sort -> Log')

h = Harness()
h.status.append(ConsoleStatus())
p1 = build_producer('Producer 1')
h.add_chain(p1, Sort(key=('id', )), Log())
run(h)

print('###############')
print('# Sorted Join #')
print('###############')
print
print("Producer1 -> Sort -(stdin)---> SortedJoin --> Log")
print("Producer2 -> Sort -(stdin2)-'")

h = Harness()
h.status.append(ConsoleStatus())
p1 = build_producer('Producer 1')
p2 = build_producer('Producer 2',
                    get_value=lambda id: int(id) * 42,
Example #2
0
# limitations under the License.

from rdc.etl.extra.example import Harness, build_producer, build_simple_transform, run
from rdc.etl.io import STDOUT, STDOUT2
from rdc.etl.transform.flow.split import Split
from rdc.etl.transform.util import Log


print('################')
print('# Linear shape #')
print('################')
print
print('Producer -> SimpleTransform -> Log')

h = Harness()
p1 = build_producer('Producer 1')
h.add_chain(p1, build_simple_transform(), Log())
run(h)


print('#####################################')
print('# Split shape (2 different outputs) #')
print('#####################################')
print
print('Producer -> Split ---(stdout)--> SimpleTransform1 -> Log1')
print('                   `-(stdout2)-> SimpleTransform2 -> Log2')

h = Harness()
producer = build_producer('Producer 1', 10)
split = Split(output_selector = lambda h: h.get('id') % 2 and STDOUT2 or STDOUT)
h.add_chain(producer, split, build_simple_transform(), Log())
Example #3
0
# See the License for the specific language governing permissions and
# limitations under the License.

from rdc.etl.extra.example import Harness, build_producer, build_simple_transform, run
from rdc.etl.io import STDOUT, STDOUT2
from rdc.etl.transform.flow.split import Split
from rdc.etl.transform.util import Log

print('################')
print('# Linear shape #')
print('################')
print
print('Producer -> SimpleTransform -> Log')

h = Harness()
p1 = build_producer('Producer 1')
h.add_chain(p1, build_simple_transform(), Log())
run(h)

print('#####################################')
print('# Split shape (2 different outputs) #')
print('#####################################')
print
print('Producer -> Split ---(stdout)--> SimpleTransform1 -> Log1')
print('                   `-(stdout2)-> SimpleTransform2 -> Log2')

h = Harness()
producer = build_producer('Producer 1', 10)
split = Split(output_selector=lambda h: h.get('id') % 2 and STDOUT2 or STDOUT)
h.add_chain(producer, split, build_simple_transform(), Log())
h.add_chain(build_simple_transform('lower'), Log(), input=(
Example #4
0
from rdc.etl.extra.example import Harness, build_producer, run
from rdc.etl.extra.simple import SimpleTransform
from rdc.etl.transform.util import Log

print('########################')
print('# Simplest ETL process #')
print('########################')
print
print('Producer -> Transform -> Log')

h = Harness()
p = build_producer('Producer')
t = SimpleTransform()
h.add_chain(p, t, Log())
run(h)

print '\n'.join(map(repr, h.get_threads()))


Example #5
0
from rdc.etl.extra.example import Harness, build_producer, run
from rdc.etl.io import STDIN2
from rdc.etl.status.console import ConsoleStatus
from rdc.etl.transform.flow.sort import Sort
from rdc.etl.transform.flow.sortedjoin import SortedJoin
from rdc.etl.transform.util import Log

print('#################')
print('# Software sort #')
print('#################')
print
print('Producer -> Sort -> Log')

h = Harness()
h.status.append(ConsoleStatus())
p1 = build_producer('Producer 1')
h.add_chain(p1, Sort(key=('id',)), Log())
run(h)

print('###############')
print('# Sorted Join #')
print('###############')
print
print("Producer1 -> Sort -(stdin)---> SortedJoin --> Log")
print("Producer2 -> Sort -(stdin2)-'")

h = Harness()
h.status.append(ConsoleStatus())
p1 = build_producer('Producer 1')
p2 = build_producer('Producer 2', get_value=lambda id: int(id) * 42, value_name='price')
sj = SortedJoin(key=('id', ))
Example #6
0
# -*- coding: utf-8 -*-
import time

from rdc.etl.harness.threaded import ThreadedHarness
from rdc.etl.extra.example import build_producer, run
from rdc.etl.status.console import ConsoleStatus
from rdc.etl.status.http import HttpStatus
from rdc.etl.transform import Transform

h = ThreadedHarness()
p1 = build_producer('Producer 1', count=500)


@Transform
def delay(h, c):
    time.sleep(0.2)
    yield h


@Transform
def delay2(h, c):
    time.sleep(0.5)
    yield h


h.add_chain(p1, delay, delay2)
h.status.append(ConsoleStatus())
h.status.append(HttpStatus())
run(h)
Example #7
0
# -*- coding: utf-8 -*-
import time

from rdc.etl.harness.threaded import ThreadedHarness
from rdc.etl.extra.example import build_producer, run
from rdc.etl.status.console import ConsoleStatus
from rdc.etl.status.http import HttpStatus
from rdc.etl.transform import Transform

h = ThreadedHarness()
p1 = build_producer('Producer 1', count=500)

@Transform
def delay(h, c):
    time.sleep(0.2)
    yield h

@Transform
def delay2(h, c):
    time.sleep(0.5)
    yield h

h.add_chain(p1, delay, delay2)
h.status.append(ConsoleStatus())
h.status.append(HttpStatus())
run(h)