def test_create_and_get_remote_table(context_id): nrclient = NodeRegistryClient() db = nrclient.get_db_by_node_id(local_node_id) local_node_monetdb_sock_address = f"{str(db.ip)}:{db.port}" table_schema = TableSchema([ ColumnInfo("col1", "int"), ColumnInfo("col2", "real"), ColumnInfo("col3", "text"), ]) table_name = local_node_create_table.delay( context_id=context_id, command_id=str(uuid.uuid1()).replace("-", ""), schema_json=table_schema.to_json(), ).get() table_info = TableInfo(table_name, table_schema) global_node_create_remote_table.delay( table_info_json=table_info.to_json(), monetdb_socket_address=local_node_monetdb_sock_address, ).get() remote_tables = global_node_get_remote_tables.delay( context_id=context_id).get() assert table_name in remote_tables
def create_two_column_table(context_id, table_id: int): table_schema = TableSchema( [ColumnInfo("col1", "int"), ColumnInfo("col2", "real")]) table_name = local_node_create_table.delay( context_id=f"{context_id}_table_{table_id}", command_id=str(uuid.uuid1()).replace("-", ""), schema_json=table_schema.to_json(), ).get() return table_name
def test_sql_injection_create_table_tableschema_name(): with pytest.raises(ValueError): schema = TableSchema([ ColumnInfo("Robert'); DROP TABLE data; --", "INT"), ColumnInfo("col2", "real"), ColumnInfo("col3", "text"), ]) json_schema = schema.to_json() local_node_create_table.delay( context_id=context_id, command_id=str(uuid.uuid1()).replace("-", ""), schema_json=json_schema, ).get()
def get_table_schema(table_name: str) -> TableSchema: """ Retrieves a schema for a specific table name from the monetdb. Parameters ---------- table_name : str The name of the table Returns ------ TableSchema A schema which is TableSchema object. """ schema = MonetDB().execute_and_fetchall(f""" SELECT columns.name, columns.type FROM columns RIGHT JOIN tables ON tables.id = columns.table_id WHERE tables.name = '{table_name}' AND tables.system=false """) if not schema: raise TablesNotFound([table_name]) return TableSchema([ ColumnInfo(name, _convert_monet2mip_column_type(table_type)) for name, table_type in schema ])
def test_create_and_find_tables(context_id): table_schema = TableSchema([ ColumnInfo("col1", "int"), ColumnInfo("col2", "real"), ColumnInfo("col3", "text"), ]) table_1_name = local_node_create_table.delay( context_id=context_id, command_id=str(uuid.uuid4()).replace("-", ""), schema_json=table_schema.to_json(), ).get() tables = local_node_get_tables.delay(context_id=context_id).get() assert table_1_name in tables values = [[1, 0.1, "test1"], [2, 0.2, None], [3, 0.3, "test3"]] local_node_insert_data_to_table.delay(table_name=table_1_name, values=values).get() table_data_json = local_node_get_table_data.delay( table_name=table_1_name).get() table_data = TableData.from_json(table_data_json) assert table_data.data == values assert table_data.schema == table_schema table_2_name = local_node_create_table.delay( context_id=context_id, command_id=str(uuid.uuid4()).replace("-", ""), schema_json=table_schema.to_json(), ).get() tables = local_node_get_tables.delay(context_id=context_id).get() assert table_2_name in tables values = [[1, 0.1, "test1"], [2, None, "None"], [3, 0.3, None]] local_node_insert_data_to_table.delay(table_name=table_2_name, values=values).get() table_data_json = local_node_get_table_data.delay( table_name=table_2_name).get() table_data = TableData.from_json(table_data_json) assert table_data.data == values assert table_data.schema == table_schema table_schema_json = local_node_get_table_schema.delay( table_name=table_2_name).get() table_schema_1 = TableSchema.from_json(table_schema_json) assert table_schema_1 == table_schema
def create_three_column_table_with_data(context_id, table_id: int): table_schema = TableSchema([ ColumnInfo("col1", "int"), ColumnInfo("col2", "real"), ColumnInfo("col3", "text"), ]) table_name = local_node_create_table.delay( context_id=f"{context_id}_table_{table_id}", command_id=str(uuid.uuid1()).replace("-", ""), schema_json=table_schema.to_json(), ).get() values = [[1, 0.1, "test1"], [2, 0.2, "test2"], [3, 0.3, "test3"]] local_node_insert_data_to_table.delay(table_name=table_name, values=values).get() return table_name
def test_create_view_and_get_view(context_id): table_schema = TableSchema( [ ColumnInfo("col1", "int"), ColumnInfo("col2", "real"), ColumnInfo("col3", "text"), ] ) table_name = local_node_create_table.delay( context_id=context_id, command_id=str(uuid.uuid4()).replace("-", ""), schema_json=table_schema.to_json(), ).get() values = [[1, 0.1, "test1"], [2, 0.2, None], [3, 0.3, "test3"]] local_node_insert_data_to_table.delay(table_name=table_name, values=values).get() columns = ["col1", "col3"] view_name = local_node_create_view.delay( context_id=context_id, command_id=str(uuid.uuid1()).replace("-", ""), table_name=table_name, columns=columns, filters_json="filters_json", ).get() views = local_node_get_views.delay(context_id=context_id).get() assert view_name in views view_intended_schema = TableSchema( [ ColumnInfo("col1", "int"), ColumnInfo("col3", "text"), ] ) schema_result_json = local_node_get_view_schema.delay(table_name=view_name).get() assert view_intended_schema == TableSchema.from_json(schema_result_json) view_data_json = local_node_get_view_data.delay(table_name=view_name).get() view_data = TableData.from_json(view_data_json) assert all( len(columns) == len(view_intended_schema.columns) for columns in view_data.data ) assert view_data.schema == view_intended_schema
def test_create_pathology_view_and_get_view(context_id): columns = [ "dataset", "age_value", "gcs_motor_response_scale", "pupil_reactivity_right_eye_result", ] datasets = ["dummy_tbi"] pathology = "tbi" view_name = local_node_create_pathology_view.delay( context_id=context_id, command_id=str(uuid.uuid1()).replace("-", ""), pathology=pathology, datasets=datasets, columns=columns, filters_json="filters_json", ).get() views = local_node_get_views.delay(context_id=context_id).get() assert view_name in views schema = TableSchema( [ ColumnInfo("row_id", "int"), ColumnInfo("dataset", "text"), ColumnInfo("age_value", "int"), ColumnInfo("gcs_motor_response_scale", "text"), ColumnInfo("pupil_reactivity_right_eye_result", "text"), ] ) schema_result_json = local_node_get_view_schema.delay(table_name=view_name).get() assert schema == TableSchema.from_json(schema_result_json) view_data_json = local_node_get_view_data.delay(table_name=view_name).get() view_data = TableData.from_json(view_data_json) assert all(len(columns) == len(schema.columns) for columns in view_data.data) assert view_data.schema == schema view_schema_json = local_node_get_view_schema.delay(table_name=view_name).get() view_schema = TableSchema.from_json(view_schema_json) assert view_schema == schema
def _convert_udf2udfgen_arg(udf_argument: UDFArgument): if udf_argument.type == "literal": return udf_argument.value elif udf_argument.type == "table": name = udf_argument.value schema = get_table_schema(udf_argument.value) udf_generator_schema = [ ColumnInfo(column.name, column.data_type) for column in schema.columns ] return TableInfo(name, udf_generator_schema) else: raise ValueError( "A udf argument can have one of the following types 'literal','table'." )
def test_create_merge_table_with_remote_tables(context_id): local_node_1_data = node_catalog.get_node(local_node_1_id) local_node_2_data = node_catalog.get_node(local_node_2_id) schema = TableSchema([ ColumnInfo("col1", "int"), ColumnInfo("col2", "real"), ColumnInfo("col3", "text"), ]) # Create local tables local_node_1_table_name = local_node_1_create_table.delay( context_id=context_id, command_id=str(uuid.uuid1()).replace("-", ""), schema_json=schema.to_json(), ).get() local_node_2_table_name = local_node_2_create_table.delay( context_id=context_id, command_id=str(uuid.uuid1()).replace("-", ""), schema_json=schema.to_json(), ).get() # Insert data into local tables values = [[1, 0.1, "test1"], [2, 0.2, "test2"], [3, 0.3, "test3"]] local_node_1_insert_data_to_table.delay(table_name=local_node_1_table_name, values=values).get() local_node_2_insert_data_to_table.delay(table_name=local_node_2_table_name, values=values).get() # Create remote tables table_info_local_1 = TableInfo(local_node_1_table_name, schema) table_info_local_2 = TableInfo(local_node_2_table_name, schema) local_node_1_monetdb_sock_address = ( f"{local_node_1_data.monetdbIp}:{local_node_1_data.monetdbPort}") local_node_2_monetdb_sock_address = ( f"{local_node_2_data.monetdbIp}:{local_node_2_data.monetdbPort}") global_node_create_remote_table.delay( table_info_json=table_info_local_1.to_json(), monetdb_socket_address=local_node_1_monetdb_sock_address, ).get() global_node_create_remote_table.delay( table_info_json=table_info_local_2.to_json(), monetdb_socket_address=local_node_2_monetdb_sock_address, ).get() remote_tables = global_node_get_remote_tables.delay( context_id=context_id).get() assert local_node_1_table_name in remote_tables assert local_node_2_table_name in remote_tables # Create merge table merge_table_name = global_node_create_merge_table.delay( context_id=context_id, command_id=str(uuid.uuid1()).replace("-", ""), table_names=remote_tables, ).get() # Validate merge table exists merge_tables = global_node_get_merge_tables.delay( context_id=context_id).get() assert merge_table_name in merge_tables # Validate merge table row count table_data_json = global_node_get_merge_table_data.delay( table_name=merge_table_name).get() table_data = TableData.from_json(table_data_json) row_count = len(table_data.data) assert row_count == 6