def initialize(cash=None): """Initialize the dashboard, data storage, and account balances.""" # Initialize Database db = sqlite3.connect("algo_trader_history.sqlite") with db: cur = db.cursor() cur.execute("DROP TABLE IF EXISTS data") # Initialize Account account = {"balance": cash, "shares": 0} # Initialize Streaming DataFrame for Raw Data data_stream = Stream() data_example = pd.DataFrame(data={"close": []}, columns=["close"], index=pd.DatetimeIndex([])) data_stream_df = DataFrame(data_stream, example=data_example) # Initialize Streaming DataFrame for Signals signals_stream = Stream() columns = ["close", "signal", "sma10", "sma20", "entry/exit"] data = { "close": [], "signal": [], "sma10": [], "sma20": [], "entry/exit": [] } signals_example = pd.DataFrame(data=data, columns=columns, index=pd.DatetimeIndex([])) signals_stream_df = DataFrame(signals_stream, example=signals_example) # Initialize Streaming DataFrame for the signals dashboard = build_dashboard(data_stream_df, signals_stream_df) return db, account, data_stream, signals_stream, dashboard
def test_sink_with_args_and_kwargs(): L = dict() def mycustomsink(elem, key, prefix=""): key = prefix + key if key not in L: L[key] = list() L[key].append(elem) s = Stream() s2 = s.sink(mycustomsink, "cat", "super") s.emit(1) s.emit(2) assert L['supercat'] == [1, 2]
async def test_non_unique_emit(c, s, a, b): """Regression for https://github.com/python-streamz/streams/issues/397 Non-unique stream entries still need to each be processed. """ source = Stream(asynchronous=True) futures = source.scatter().map(lambda x: random.random()) L = futures.gather().sink_to_list() for _ in range(3): # Emit non-unique values await source.emit(0) assert len(L) == 3 assert L[0] != L[1] or L[0] != L[2]
def test_basic(): source = Stream() b1 = source.map(inc) b2 = source.map(double) c = b1.scan(add) Lc = c.sink_to_list() Lb = b2.sink_to_list() for i in range(4): source.emit(i) assert Lc == [1, 3, 6, 10] assert Lb == [0, 2, 4, 6]
def test_merge(): ''' Test the merging option for StreamDoc's.''' s1 = Stream() s2 = Stream() stot = s1.zip(s2).map(merge) L = stot.sink_to_list() sdoc1 = StreamDoc(args=[1, 2], kwargs={'a': 1, 'c': 3}) sdoc2 = StreamDoc(args=[3, 4], kwargs={'b': 2, 'c': 4}) s1.emit(sdoc1) assert len(L) == 0 s2.emit(sdoc2) result_kwargs = L[0]['kwargs'] result_args = L[0]['args'] assert result_kwargs['a'] == 1 assert result_kwargs['b'] == 2 assert result_kwargs['c'] == 4 assert result_args == [1, 2, 3, 4]
def test_combine_latest_metadata(): a = Stream() b = Stream() L = metadata(a.combine_latest(b)).sink_to_list() a.emit(1, metadata=[{'v': 1}]) b.emit(2, metadata=[{'v': 2}]) b.emit(3) b.emit(4, metadata=[{'v': 4}]) assert L == [ [{ 'v': 1 }, { 'v': 2 }], # first emit when 2 is introduced [{ 'v': 1 }], # 3 has no metadata but it replaces the value on 'b' [{ 'v': 1 }, { 'v': 4 }] # 4 replaces the value without metadata on 'b' ]
def test_collect_metadata(): source = Stream() collector = source.collect() L = metadata(collector).sink_to_list() source.emit(0) source.emit(1, metadata=[{'v': 1}]) source.emit(2, metadata=[{'v': 2}]) collector.flush() source.emit(3, metadata=[{'v': 3}]) source.emit(4, metadata=[{'v': 4}]) collector.flush() assert L == [ [{'v': 1}, {'v': 2}], # Flush 0-2, but 0 has no metadata [{'v': 3}, {'v': 4}] # Flush the rest ]
def test_stream_accumulate(): ''' This tests that the dispatching on the streamdoc's accumulate routine is working properly.''' def myacc(prevstate, newstate): return prevstate + newstate s = Stream() sout = s.accumulate(psda(myacc)) L = sout.sink_to_list() sout.emit(StreamDoc(args=[1])) sout.emit(StreamDoc(args=[2])) sout.emit(StreamDoc(args=[3])) print(L)
def test_delay(): source = Stream(asynchronous=True) L = source.delay(0.02).sink_to_list() for i in range(5): yield source.emit(i) assert not L yield gen.sleep(0.04) assert len(L) < 5 yield gen.sleep(0.1) assert len(L) == 5
async def execute(): unique_data = [{ "type": "price", "base": "ETC", "trade": "BTC", "exchange": "binance", "period": "minute", "timestamp": time_1, }] client = await Client(processes=True, asynchronous=True) source = Stream(asynchronous=True) (source.scatter().map(load_bars).rate_limit('500ms').gather().sink( process_bars)) for data in unique_data: await source.emit(data)
def test_sliding_window(): source = Stream() L = source.sliding_window(2).sink_to_list() for i in range(10): source.emit(i) assert L == [(0, ), (0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9)] L = source.sliding_window(2, return_partial=False).sink_to_list() for i in range(10): source.emit(i) assert L == [(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9)]
async def save_to_database(nursery_url, conn, partition_size=100): with Sub0(dial=nursery_url) as sub: sub.subscribe(b"") # Subscribe to everything stream = Stream(asynchronous=False) ( stream.map(ujson.loads) .flatten() .map(flatten_record) .partition(partition_size) .map(pd.DataFrame) .sink(partial(insert_dataframe, "logs", conn)) ) while True: stream.emit(await sub.arecv())
def test_separate_thread_with_time(clean): L = [] @gen.coroutine def slow_write(x): yield gen.sleep(0.1) L.append(x) source = Stream(asynchronous=False) source.map(inc).sink(slow_write) start = time() source.emit(1) stop = time() assert stop - start > 0.1 assert L == [2]
def init_plot(self): self.frame_pipe = Pipe(data=[]) self.frame_dmap = hv.DynamicMap(hv.Labels, streams=[self.frame_pipe]) self.frame_dmap = self.frame_dmap.opts( xlim=(-10, 10), ylim=(0.5, 2.5), height=200, width=500, xaxis=None, yaxis=None, title="Best solution", ) example = pd.DataFrame({"reward": []}) self.stream = Stream() self.buffer_df = DataFrame(stream=self.stream, example=example) self.score_dmap = self.buffer_df.hvplot(y=["reward"]).opts( height=200, width=400, title="Best value found")
async def listen(self, symbol): if self.raw_stream is not None: if self.raw_stream == '': from appdirs import user_cache_dir self.raw_stream = user_cache_dir(LIBRARY_NAME) date = time.strftime('%Y%m%dT%H%M%S') filename = f'{self.exchange}_{symbol}_{date}.json.gz' raw_stream_path = str(Path(self.raw_stream) / filename) logger.info(f'Writing raw stream to {raw_stream_path} ...') def write_to_file(batch): logger.info(f'Writing batch of {len(batch)} for {symbol} ...') with gzip.open(raw_stream_path, 'at') as f: for packet in batch: f.write(packet + '\n') self.raw_stream = Stream() (self.raw_stream.partition(self.batch_size).sink(write_to_file))
def test_gc(): source = Stream() L = [] a = source.map(L.append) source.emit(1) assert L == [1] del a import gc; gc.collect() start = time() while source.downstreams: sleep(0.01) assert time() < start + 1 source.emit(2) assert L == [1]
async def test_partition_then_scatter_async(c, s, a, b): # Ensure partition w/ timeout before scatter works correctly for # asynchronous start = time.monotonic() source = Stream(asynchronous=True) L = source.partition( 2, timeout=.1).scatter().map(lambda x: [xx + 1 for xx in x]).buffer( 2).gather().flatten().sink_to_list() rc = RefCounter(loop=source.loop) for i in range(3): await source.emit(i, metadata=[{'ref': rc}]) while rc.count != 0 and time.monotonic() - start < 1.: await gen.sleep(1e-2) assert L == [1, 2, 3]
def test_window_aggs_with_start_state(stream): example = pd.DataFrame({'name': [], 'amount': []}) sdf = DataFrame(stream, example=example) output0 = sdf.window(2, with_state=True, start=None).amount.sum().stream.gather().sink_to_list() df = pd.DataFrame({'name': ['Alice', 'Tom', 'Linda'], 'amount': [50, 100, 200]}) stream.emit(df) df = pd.DataFrame({'name': ['Bob'], 'amount': [250]}) stream.emit(df) assert output0[-1][1] == 450 stream = Stream() example = pd.DataFrame({'name': [], 'amount': []}) sdf = DataFrame(stream, example=example) output1 = sdf.window(2, with_state=True, start=output0[-1][0]).amount.sum().stream.gather().sink_to_list() df = pd.DataFrame({'name': ['Alice'], 'amount': [50]}) stream.emit(df) assert output1[-1][1] == 300
def test_partition_metadata(): source = Stream() L = metadata(source.partition(2)).sink_to_list() source.emit(0) source.emit(1, metadata=[{'v': 1}]) source.emit(2, metadata=[{'v': 2}]) source.emit(3, metadata=[{'v': 3}]) assert L == [ [{ 'v': 1 }], # first emit when 1 is introduced. 0 has no metadata [{ 'v': 2 }, { 'v': 3 }] # second emit ]
def test_rolling_aggs_with_start_state(stream): example = cudf.DataFrame({"name": [], "amount": []}) sdf = DataFrame(stream, example=example) output0 = ( sdf.rolling(2, with_state=True, start=()) .amount.sum() .stream.gather() .sink_to_list() ) df = cudf.DataFrame( {"name": ["Alice", "Tom", "Linda"], "amount": [50, 100, 200]} ) stream.emit(df) df = cudf.DataFrame({"name": ["Bob"], "amount": [250]}) stream.emit(df) assert assert_eq( output0[-1][0].reset_index(drop=True), cudf.Series([200, 250], name="amount"), ) assert assert_eq( output0[-1][1].reset_index(drop=True), cudf.Series([450], name="amount"), ) stream = Stream() example = cudf.DataFrame({"name": [], "amount": []}) sdf = DataFrame(stream, example=example) output1 = ( sdf.rolling(2, with_state=True, start=output0[-1][0]) .amount.sum() .stream.gather() .sink_to_list() ) df = cudf.DataFrame({"name": ["Alice"], "amount": [50]}) stream.emit(df) assert assert_eq( output1[-1][0].reset_index(drop=True), cudf.Series([250, 50], name="amount"), ) assert assert_eq( output1[-1][1].reset_index(drop=True), cudf.Series([300], name="amount"), )
def test_windowed_groupby_aggs_with_start_state(stream): example = cudf.DataFrame({"name": [], "amount": []}) sdf = DataFrame(stream, example=example) output0 = ( sdf.window(5, with_state=True, start=None) .groupby(["name"]) .amount.sum() .stream.gather() .sink_to_list() ) df = cudf.DataFrame( {"name": ["Alice", "Tom", "Linda"], "amount": [50, 100, 200]} ) stream.emit(df) df = cudf.DataFrame( {"name": ["Alice", "Linda", "Bob"], "amount": [250, 300, 350]} ) stream.emit(df) stream = Stream() example = cudf.DataFrame({"name": [], "amount": []}) sdf = DataFrame(stream, example=example) output1 = ( sdf.window(5, with_state=True, start=output0[-1][0]) .groupby(["name"]) .amount.sum() .stream.gather() .sink_to_list() ) df = cudf.DataFrame( { "name": ["Alice", "Linda", "Tom", "Bob"], "amount": [50, 100, 150, 200], } ) stream.emit(df) out_df1 = cudf.DataFrame( { "name": ["Alice", "Bob", "Linda", "Tom"], "amount": [50, 550, 100, 150], } ) assert_eq(output1[-1][1].reset_index(), out_df1)
def test_buffer(): source = Stream(asynchronous=True) L = source.map(inc).buffer(10).map(inc).rate_limit(0.05).sink_to_list() start = time() for i in range(10): yield source.emit(i) stop = time() assert stop - start < 0.01 assert not L start = time() for i in range(5): yield source.emit(i) stop = time() assert L assert stop - start > 0.04
def test_timed_window(): source = Stream(asynchronous=True) a = source.timed_window(0.01) assert a.loop is IOLoop.current() L = a.sink_to_list() for i in range(10): yield source.emit(i) yield gen.sleep(0.004) yield gen.sleep(a.interval) assert L assert sum(L, []) == list(range(10)) assert all(len(x) <= 3 for x in L) assert any(len(x) >= 2 for x in L) yield gen.sleep(0.1) assert not L[-1]
async def test_map_on_dict(c, s, a, b): # dask treats dicts differently, so we have to make sure # the user sees no difference in the streamz api. # Regression test against #336 def add_to_dict(d): d["x"] = d["i"] return d source = Stream(asynchronous=True) futures = source.scatter().map(add_to_dict) L = futures.gather().sink_to_list() for i in range(5): await source.emit({"i": i}) assert len(L) == 5 for i, item in enumerate(sorted(L, key=lambda x: x["x"])): assert item["x"] == i assert item["i"] == i
def test_rolling_aggs_with_start_state(stream): example = pd.DataFrame({'name': [], 'amount': []}) sdf = DataFrame(stream, example=example) output0 = sdf.rolling(2, with_state=True, start=()).amount.sum().stream.gather().sink_to_list() df = pd.DataFrame({'name': ['Alice', 'Tom', 'Linda'], 'amount': [50, 100, 200]}) stream.emit(df) df = pd.DataFrame({'name': ['Bob'], 'amount': [250]}) stream.emit(df) assert assert_eq(output0[-1][0].reset_index(drop=True), pd.Series([200, 250], name="amount")) assert assert_eq(output0[-1][1].reset_index(drop=True), pd.Series([450.0], name="amount")) stream = Stream() example = pd.DataFrame({'name': [], 'amount': []}) sdf = DataFrame(stream, example=example) output1 = sdf.rolling(2, with_state=True, start=output0[-1][0]).amount.sum().stream.gather().sink_to_list() df = pd.DataFrame({'name': ['Alice'], 'amount': [50]}) stream.emit(df) assert assert_eq(output1[-1][0].reset_index(drop=True), pd.Series([250, 50], name="amount")) assert assert_eq(output1[-1][1].reset_index(drop=True), pd.Series([300.0], name="amount"))
async def model_flap(nursery_url, conn, partition_size=100): with Sub0(dial=nursery_url) as sub: sub.subscribe(b"") stream = Stream(asynchronous=False) ( stream.map(ujson.loads) .flatten() .map(flatten_record) .partition(partition_size) .map(pd.DataFrame) .map(build_game_table) .map(partial(model_train, None, conn)) .map(partial(model_test, None, conn)) .sink(print) ) while True: stream.emit(await sub.arecv())
def test_backpressure(): q = Queue(maxsize=2) source = Stream(asynchronous=True) source.map(inc).scan(add, start=0).sink(q.put) @gen.coroutine def read_from_q(): while True: yield q.get() yield gen.sleep(0.1) IOLoop.current().add_callback(read_from_q) start = time() for i in range(5): yield source.emit(i) end = time() assert end - start >= 0.2
def test_partition_then_scatter_sync(loop): # Ensure partition w/ timeout before scatter works correctly for synchronous with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as client: # noqa: F841 start = time.monotonic() source = Stream() L = source.partition( 2, timeout=.1).scatter().map(lambda x: [xx + 1 for xx in x] ).gather().flatten().sink_to_list() assert source.loop is client.loop rc = RefCounter() for i in range(3): source.emit(i, metadata=[{'ref': rc}]) while rc.count != 0 and time.monotonic() - start < 2.: time.sleep(1e-2) assert L == [1, 2, 3]
def test_windowed_groupby_aggs_with_start_state(stream): example = pd.DataFrame({'name': [], 'amount': []}) sdf = DataFrame(stream, example=example) output0 = sdf.window(5, with_state=True, start=None).groupby(['name']).amount.sum().\ stream.gather().sink_to_list() df = pd.DataFrame({'name': ['Alice', 'Tom', 'Linda'], 'amount': [50, 100, 200]}) stream.emit(df) df = pd.DataFrame({'name': ['Alice', 'Linda', 'Bob'], 'amount': [250, 300, 350]}) stream.emit(df) stream = Stream() example = pd.DataFrame({'name': [], 'amount': []}) sdf = DataFrame(stream, example=example) output1 = sdf.window(5, with_state=True, start=output0[-1][0]).groupby(['name']).amount.sum().\ stream.gather().sink_to_list() df = pd.DataFrame({'name': ['Alice', 'Linda', 'Tom', 'Bob'], 'amount': [50, 100, 150, 200]}) stream.emit(df) out_df1 = pd.DataFrame({'name':['Alice', 'Bob', 'Linda', 'Tom'], 'amount':[50.0, 550.0, 100.0, 150.0]}) assert_eq(output1[-1][1].reset_index(), out_df1)
def test_timed_window_metadata(): source = Stream() L = metadata(source.timed_window(0.01)).sink_to_list() source.emit(0) source.emit(1, metadata=[{'v': 1}]) yield gen.sleep(0.1) source.emit(2, metadata=[{'v': 2}]) source.emit(3, metadata=[{'v': 3}]) yield gen.sleep(0.1) assert L == [ [{ 'v': 1 }], # first emit because 0 has no metadata [{ 'v': 2 }, { 'v': 3 }] # second emit ]