def test_never_timeout(self): session = Session() for _ in range(2): token1 = session.session_token sleep(300) token2 = session.session_token self.assertNotEqual(token1, token2) session.close()
def run_script(dh_session: Session) -> Table: server_script = ''' t2 = t.where("VendorID > 0")\ .sort("VendorID", "fare_amount")\ .headBy(5, "VendorID") ''' dh_session.run_script(server_script) return dh_session.open_table("t2")
def demo_asof_join(dh_session: Session): left_table = dh_session.time_table(period=100000).update( formulas=["Col1=i"]) right_table = dh_session.time_table(period=200000).update( formulas=["Col1=i"]) time.sleep(2) return left_table.aj(right_table, on=["Timestamp"], joins=["Timestamp2 = Timestamp", "Col2 = Col1"])
def demo_asof_join(dh_session: Session): left_table = dh_session.time_table(period=100000).update( column_specs=["Col1=i"]) right_table = dh_session.time_table(period=200000).update( column_specs=["Col1=i"]) time.sleep(2) return left_table.aj( right_table, keys=["Timestamp"], columns_to_add=["Timestamp2 = Timestamp", "Col2 = Col1"])
def test_merge_tables(self): session = Session() pa_table = csv.read_csv(self.csv_file) table1 = session.import_table(pa_table) table2 = table1.group_by(by=["a", "c"]).ungroup(cols=["b", "d", "e"]) table3 = table1.where(["a % 2 > 0 && b % 3 == 1"]) result_table = session.merge_tables(tables=[table1, table2, table3], order_by="a") self.assertTrue(result_table.size > table1.size) self.assertTrue(result_table.size > table2.size) self.assertTrue(result_table.size > table3.size)
def test_persistent_tables(self): with Session() as session1: session1 = Session() session1.run_script('t = None') t = session1.empty_table(10) session1.bind_table('t', t) with Session(sync_fields=SYNC_ONCE) as session2: self.assertIn('t', session2.tables)
def demo_query(dh_session: Session, taxi_data_table: Table) -> Table: # create a query and execute it on the DH server query = (dh_session.query(taxi_data_table) .where(filters=["VendorID > 0"]) .sort(order_by=["VendorID", "fare_amount"]) .tail_by(num_rows=5, by=["VendorID"])) return query.exec()
def import_taxi_records(dh_session: Session) -> Table: # download the CSV data and read it into a pyarrow table and prepare it for uploading into DH csv_file_name = download_csv( url= "https://nyc-tlc.s3.amazonaws.com/trip+data/yellow_tripdata_2020-12.csv" ) pa_table = csv.read_csv(csv_file_name) # drop unwanted columns unwanted_columns = [ "tpep_pickup_datetime", "tpep_dropoff_datetime", "RatecodeID", "store_and_fwd_flag", "PULocationID", "DOLocationID" ] pa_table = pa_table.drop(unwanted_columns) # drop any column with a unsupported data type for column, column_name in zip(pa_table.columns, pa_table.column_names): if not is_deephaven_compatible(column.type): print( f"drop column: {column_name} because of unsupported data type {column.type}" ) pa_table = pa_table.drop([column_name]) # upload the pyarrow table to the Deephaven server return dh_session.import_table(pa_table)
def main(): with Session(host="localhost", port=10000) as dh_session: taxi_data_table = import_taxi_records(dh_session) bottom_5_fares_table = demo_chained_table_ops(taxi_data_table) # download the table to the client in the form of pyarrow table and convert it into a Pandas DataFrame snapshot_data = bottom_5_fares_table.snapshot() df = snapshot_data.to_pandas() pd.set_option("max_columns", 20) print(df)
def test_shared_tables(self): session1 = Session(sync_fields=SYNC_REPEATED) session1.run_script('t = None') session2 = Session() t = session2.empty_table(10) session2.bind_table('t', t) @timeout_decorator.timeout(seconds=1) def wait_for_table(): while 't' not in session1.tables: pass try: wait_for_table() except timeout_decorator.TimeoutError: self.fail('table did not get synced to session1')
class BaseTestCase(unittest.TestCase): csv_file = 'test.csv' @classmethod def setUpClass(cls) -> None: warnings.filterwarnings("ignore", category=DeprecationWarning) if not os.path.exists(BaseTestCase.csv_file): with open(BaseTestCase.csv_file, 'w'): pass make_random_csv(5, 1000, output_file=BaseTestCase.csv_file) @classmethod def tearDownClass(cls) -> None: if os.path.exists(BaseTestCase.csv_file): os.remove(BaseTestCase.csv_file) def setUp(self) -> None: self.session = Session() def tearDown(self) -> None: self.session.close()
def main(): with Session(host="localhost", port=10000) as dh_session: taxi_data_table = import_taxi_records(dh_session) variable_name = "t" dh_session.bind_table(taxi_data_table, variable_name) bottom_5_fares_table = run_script(dh_session=dh_session) snapshot_data = bottom_5_fares_table.snapshot() df = snapshot_data.to_pandas() pd.set_option("max_columns", 20) print(df)
def main(): with Session(host="localhost", port=10000) as dh_session: taxi_data_table = import_taxi_records(dh_session) top_5_fares_table = demo_query(dh_session=dh_session, taxi_data_table=taxi_data_table) bottom_5_fares_table = demo_chained_table_ops(taxi_data_table) combined_fares_table = dh_session.merge_tables(tables=[top_5_fares_table, bottom_5_fares_table]) snapshot_data = combined_fares_table.snapshot() df = snapshot_data.to_pandas() pd.set_option("max_columns", 20) print(df)
def test_multiple_sessions(self): sessions = [] for i in range(100): sessions.append(Session()) tables = [] for session in sessions: pa_table = csv.read_csv(self.csv_file) table1 = session.import_table(pa_table) table2 = table1.group_by() self.assertEqual(table2.size, 1) tables.append(table1) for i, table in enumerate(tables[:-1]): j_table = table.natural_join(tables[i + 1], on=["a", "b", "c", "d", "e"]) self.assertEqual(table.size, j_table.size) for session in sessions: session.close()
def setUp(self) -> None: self.session = Session()
def test_close(self): session = Session() session.close() self.assertEqual(False, session.is_connected) self.assertEqual(False, session.is_alive)
def test_connect_failure(self): with self.assertRaises(DHError): session = Session(port=80)
def main(): with Session(host="localhost", port=10000) as dh_session: joined_table = demo_asof_join(dh_session) df = joined_table.snapshot().to_pandas() print(df)
def test_empty_table(self): session = Session() t = session.empty_table(1000) self.assertEqual(t.size, 1000) session.close()
def test_time_table(self): session = Session() t = session.time_table(period=100000) self.assertFalse(t.is_static) session.close()