def test_numpy_tablet_serialization(): measurements_ = ["s_01", "s_02", "s_03", "s_04", "s_05", "s_06"] data_types_ = [ TSDataType.BOOLEAN, TSDataType.INT32, TSDataType.INT64, TSDataType.FLOAT, TSDataType.DOUBLE, TSDataType.TEXT, ] values_ = [ [False, 10, 11, 1.1, 10011.1, "test01"], [True, 100, 11111, 1.25, 101.0, "test02"], [False, 100, 1, 188.1, 688.25, "test03"], [True, 0, 0, 0, 6.25, "test04"], ] timestamps_ = [16, 17, 18, 19] tablet_ = Tablet("root.sg_test_01.d_01", measurements_, data_types_, values_, timestamps_) np_values_ = [ np.array([False, True, False, True], np.dtype(">?")), np.array([10, 100, 100, 0], np.dtype(">i4")), np.array([11, 11111, 1, 0], np.dtype(">i8")), np.array([1.1, 1.25, 188.1, 0], np.dtype(">f4")), np.array([10011.1, 101.0, 688.25, 6.25], np.dtype(">f8")), np.array(["test01", "test02", "test03", "test04"]), ] np_timestamps_ = np.array([16, 17, 18, 19], np.dtype(">i8")) np_tablet_ = NumpyTablet("root.sg_test_01.d_01", measurements_, data_types_, np_values_, np_timestamps_) assert tablet_.get_binary_timestamps() == np_tablet_.get_binary_timestamps( ) assert tablet_.get_binary_values() == np_tablet_.get_binary_values()
def test_simple_query(): with IoTDBContainer() as db: db: IoTDBContainer session = Session(db.get_container_host_ip(), db.get_exposed_port(6667)) session.open(False) create_ts(session) # insert data data_nums = 100 data = {} timestamps = np.arange(data_nums) data[ts_path_lst[0]] = np.float32(np.random.rand(data_nums)) data[ts_path_lst[1]] = np.random.rand(data_nums) data[ts_path_lst[2]] = np.random.randint(10, 100, data_nums, dtype="int32") data[ts_path_lst[3]] = np.random.randint(10, 100, data_nums, dtype="int64") data[ts_path_lst[4]] = np.random.choice([True, False], size=data_nums) data[ts_path_lst[5]] = np.random.choice(["text1", "text2"], size=data_nums) df_input = pd.DataFrame(data) tablet = Tablet( device_id, measurements, data_type_lst, df_input.values, timestamps ) session.insert_tablet(tablet) df_input.insert(0, "Time", timestamps) session_data_set = session.execute_query_statement("SELECT * FROM root.*") df_output = session_data_set.todf() df_output = df_output[df_input.columns.tolist()] session.close() assert_frame_equal(df_input, df_output)
def test_numpy_tablet_auto_correct_datatype(): measurements_ = ["s_01", "s_02", "s_03", "s_04", "s_05", "s_06"] data_types_ = [ TSDataType.BOOLEAN, TSDataType.INT32, TSDataType.INT64, TSDataType.FLOAT, TSDataType.DOUBLE, TSDataType.TEXT, ] values_ = [ [True, 10000, 11111, 8.999, 776, "test05"], [True, 1000, 1111, 0, 6.25, "test06"], [False, 100, 111, 188.1, 688.25, "test07"], [False, 10, 11, 1.25, 101.0, "test08"], [False, 0, 1, 1.1, 10011.1, "test09"], ] timestamps_ = [5, 6, 7, 8, 9] tablet_ = Tablet("root.sg_test_01.d_01", measurements_, data_types_, values_, timestamps_) np_values_unsorted = [ np.array([False, False, False, True, True]), np.array([0, 10, 100, 1000, 10000]), np.array([1, 11, 111, 1111, 11111]), np.array([1.1, 1.25, 188.1, 0, 8.999]), np.array([10011.1, 101.0, 688.25, 6.25, 776]), np.array(["test09", "test08", "test07", "test06", "test05"]), ] np_timestamps_unsorted = np.array([9, 8, 7, 6, 5]) # numpy.dtype of int and float should be little endian by default assert np_timestamps_unsorted.dtype != np.dtype(">i8") for i in range(1, 4): assert np_values_unsorted[i].dtype != data_types_[i].np_dtype() np_tablet_ = NumpyTablet( "root.sg_test_01.d_01", measurements_, data_types_, np_values_unsorted, np_timestamps_unsorted, ) assert tablet_.get_binary_timestamps() == np_tablet_.get_binary_timestamps( ) assert tablet_.get_binary_values() == np_tablet_.get_binary_values()
def test_sort_numpy_tablet(): measurements_ = ["s_01", "s_02", "s_03", "s_04", "s_05", "s_06"] data_types_ = [ TSDataType.BOOLEAN, TSDataType.INT32, TSDataType.INT64, TSDataType.FLOAT, TSDataType.DOUBLE, TSDataType.TEXT, ] values_ = [ [True, 10000, 11111, 8.999, 776, "test05"], [True, 1000, 1111, 0, 6.25, "test06"], [False, 100, 111, 188.1, 688.25, "test07"], [False, 10, 11, 1.25, 101.0, "test08"], [False, 0, 1, 1.1, 10011.1, "test09"], ] timestamps_ = [5, 6, 7, 8, 9] tablet_ = Tablet("root.sg_test_01.d_01", measurements_, data_types_, values_, timestamps_) np_values_unsorted = [ np.array([False, False, False, True, True], np.dtype(">?")), np.array([0, 10, 100, 1000, 10000], np.dtype(">i4")), np.array([1, 11, 111, 1111, 11111], np.dtype(">i8")), np.array([1.1, 1.25, 188.1, 0, 8.999], np.dtype(">f4")), np.array([10011.1, 101.0, 688.25, 6.25, 776], np.dtype(">f8")), np.array(["test09", "test08", "test07", "test06", "test05"]), ] np_timestamps_unsorted = np.array([9, 8, 7, 6, 5], np.dtype(">i8")) np_tablet_ = NumpyTablet( "root.sg_test_01.d_01", measurements_, data_types_, np_values_unsorted, np_timestamps_unsorted, ) assert tablet_.get_binary_timestamps() == np_tablet_.get_binary_timestamps( ) assert tablet_.get_binary_values() == np_tablet_.get_binary_values()
def test_nullable_tablet_insertion(): with IoTDBContainer("iotdb:dev") as db: db: IoTDBContainer session = Session(db.get_container_host_ip(), db.get_exposed_port(6667)) session.open(False) measurements_ = ["s_01", "s_02", "s_03", "s_04", "s_05", "s_06"] data_types_ = [ TSDataType.BOOLEAN, TSDataType.INT32, TSDataType.INT64, TSDataType.FLOAT, TSDataType.DOUBLE, TSDataType.TEXT, ] values_ = [ [None, None, 11, 1.1, 10011.1, "test01"], [True, None, 11111, 1.25, 101.0, "test02"], [False, 100, 1, None, 688.25, "test03"], [True, None, 0, 0, 6.25, None], ] timestamps_ = [16, 17, 18, 19] tablet_ = Tablet("root.sg_test_01.d_01", measurements_, data_types_, values_, timestamps_) session.insert_tablet(tablet_) columns = [] for measurement in measurements_: columns.append("root.sg_test_01.d_01." + measurement) df_input = pd.DataFrame(values_, None, columns) df_input.insert(0, "Time", timestamps_) session_data_set = session.execute_query_statement( "select s_01, s_02, s_03, s_04, s_05, s_06 from root.sg_test_01.d_01" ) df_output = session_data_set.todf() df_output = df_output[df_input.columns.tolist()] session.close() assert_frame_equal(df_input, df_output, False)
# insert multiple records into database measurements_list_ = [["s_01", "s_02", "s_03", "s_04", "s_05", "s_06"], ["s_01", "s_02", "s_03", "s_04", "s_05", "s_06"]] values_list_ = [[False, 22, 33, 4.4, 55.1, "test_records01"], [True, 77, 88, 1.25, 8.125, "test_records02"]] data_type_list_ = [data_types_, data_types_] device_ids_ = ["root.sg_test_01.d_01", "root.sg_test_01.d_01"] session.insert_records(device_ids_, [2, 3], measurements_list_, data_type_list_, values_list_) # insert one tablet into the database. values_ = [[False, 10, 11, 1.1, 10011.1, "test01"], [True, 100, 11111, 1.25, 101.0, "test02"], [False, 100, 1, 188.1, 688.25, "test03"], [True, 0, 0, 0, 6.25, "test04"]] # Non-ASCII text will cause error since bytes can only hold 0-128 nums. timestamps_ = [4, 5, 6, 7] tablet_ = Tablet("root.sg_test_01.d_01", measurements_, data_types_, values_, timestamps_) session.insert_tablet(tablet_) # insert multiple tablets into database tablet_01 = Tablet("root.sg_test_01.d_01", measurements_, data_types_, values_, [8, 9, 10, 11]) tablet_02 = Tablet("root.sg_test_01.d_01", measurements_, data_types_, values_, [12, 13, 14, 15]) session.insert_tablets([tablet_01, tablet_02]) # execute non-query sql statement session.execute_non_query_statement("insert into root.sg_test_01.d_01(timestamp, s_02) values(16, 188);") # execute sql query statement session_data_set = session.execute_query_statement("select * from root.sg_test_01.d_01") session_data_set.set_fetch_size(1024) while session_data_set.has_next(): print(session_data_set.next())
def test_session(): with IoTDBContainer("iotdb:dev") as db: db: IoTDBContainer session = Session(db.get_container_host_ip(), db.get_exposed_port(6667)) session.open(False) if not session.is_open(): print("can't open session") exit(1) # set and delete storage groups session.set_storage_group("root.sg_test_01") session.set_storage_group("root.sg_test_02") session.set_storage_group("root.sg_test_03") session.set_storage_group("root.sg_test_04") if session.delete_storage_group("root.sg_test_02") < 0: test_fail() print_message("delete storage group failed") if session.delete_storage_groups( ["root.sg_test_03", "root.sg_test_04"]) < 0: test_fail() print_message("delete storage groups failed") # setting time series. session.create_time_series( "root.sg_test_01.d_01.s_01", TSDataType.BOOLEAN, TSEncoding.PLAIN, Compressor.SNAPPY, ) session.create_time_series( "root.sg_test_01.d_01.s_02", TSDataType.INT32, TSEncoding.PLAIN, Compressor.SNAPPY, ) session.create_time_series( "root.sg_test_01.d_01.s_03", TSDataType.INT64, TSEncoding.PLAIN, Compressor.SNAPPY, ) session.create_time_series( "root.sg_test_01.d_02.s_01", TSDataType.BOOLEAN, TSEncoding.PLAIN, Compressor.SNAPPY, None, {"tag1": "v1"}, {"description": "v1"}, "temperature", ) # setting multiple time series once. ts_path_lst_ = [ "root.sg_test_01.d_01.s_04", "root.sg_test_01.d_01.s_05", "root.sg_test_01.d_01.s_06", "root.sg_test_01.d_01.s_07", "root.sg_test_01.d_01.s_08", "root.sg_test_01.d_01.s_09", ] data_type_lst_ = [ TSDataType.FLOAT, TSDataType.DOUBLE, TSDataType.TEXT, TSDataType.FLOAT, TSDataType.DOUBLE, TSDataType.TEXT, ] encoding_lst_ = [TSEncoding.PLAIN for _ in range(len(data_type_lst_))] compressor_lst_ = [ Compressor.SNAPPY for _ in range(len(data_type_lst_)) ] session.create_multi_time_series(ts_path_lst_, data_type_lst_, encoding_lst_, compressor_lst_) ts_path_lst_ = [ "root.sg_test_01.d_02.s_04", "root.sg_test_01.d_02.s_05", "root.sg_test_01.d_02.s_06", "root.sg_test_01.d_02.s_07", "root.sg_test_01.d_02.s_08", "root.sg_test_01.d_02.s_09", ] data_type_lst_ = [ TSDataType.FLOAT, TSDataType.DOUBLE, TSDataType.TEXT, TSDataType.FLOAT, TSDataType.DOUBLE, TSDataType.TEXT, ] encoding_lst_ = [TSEncoding.PLAIN for _ in range(len(data_type_lst_))] compressor_lst_ = [ Compressor.SNAPPY for _ in range(len(data_type_lst_)) ] tags_lst_ = [{"tag2": "v2"} for _ in range(len(data_type_lst_))] attributes_lst_ = [{ "description": "v2" } for _ in range(len(data_type_lst_))] session.create_multi_time_series( ts_path_lst_, data_type_lst_, encoding_lst_, compressor_lst_, None, tags_lst_, attributes_lst_, None, ) # delete time series if (session.delete_time_series([ "root.sg_test_01.d_01.s_07", "root.sg_test_01.d_01.s_08", "root.sg_test_01.d_01.s_09", ]) < 0): test_fail() print_message("delete time series failed") # checking time series # s_07 expecting False if session.check_time_series_exists("root.sg_test_01.d_01.s_07"): test_fail() print_message("root.sg_test_01.d_01.s_07 shouldn't exist") # s_03 expecting True if not session.check_time_series_exists("root.sg_test_01.d_01.s_03"): test_fail() print_message("root.sg_test_01.d_01.s_03 should exist") # d_02.s_01 expecting True if not session.check_time_series_exists("root.sg_test_01.d_02.s_01"): test_fail() print_message("root.sg_test_01.d_02.s_01 should exist") # d_02.s_06 expecting True if not session.check_time_series_exists("root.sg_test_01.d_02.s_06"): test_fail() print_message("root.sg_test_01.d_02.s_06 should exist") # insert one record into the database. measurements_ = ["s_01", "s_02", "s_03", "s_04", "s_05", "s_06"] values_ = [False, 10, 11, 1.1, 10011.1, "test_record"] data_types_ = [ TSDataType.BOOLEAN, TSDataType.INT32, TSDataType.INT64, TSDataType.FLOAT, TSDataType.DOUBLE, TSDataType.TEXT, ] if (session.insert_record("root.sg_test_01.d_01", 1, measurements_, data_types_, values_) < 0): test_fail() print_message("insert record failed") # insert multiple records into database measurements_list_ = [ ["s_01", "s_02", "s_03", "s_04", "s_05", "s_06"], ["s_01", "s_02", "s_03", "s_04", "s_05", "s_06"], ] values_list_ = [ [False, 22, 33, 4.4, 55.1, "test_records01"], [True, 77, 88, 1.25, 8.125, "test_records02"], ] data_type_list_ = [data_types_, data_types_] device_ids_ = ["root.sg_test_01.d_01", "root.sg_test_01.d_01"] if (session.insert_records(device_ids_, [2, 3], measurements_list_, data_type_list_, values_list_) < 0): test_fail() print_message("insert records failed") # insert one tablet into the database. values_ = [ [False, 10, 11, 1.1, 10011.1, "test01"], [True, 100, 11111, 1.25, 101.0, "test02"], [False, 100, 1, 188.1, 688.25, "test03"], [True, 0, 0, 0, 6.25, "test04"], ] # Non-ASCII text will cause error since bytes can only hold 0-128 nums. timestamps_ = [4, 5, 6, 7] tablet_ = Tablet("root.sg_test_01.d_01", measurements_, data_types_, values_, timestamps_) if session.insert_tablet(tablet_) < 0: test_fail() print_message("insert tablet failed") # insert one numpy tablet into the database. np_values_ = [ np.array([False, True, False, True], np.dtype(">?")), np.array([10, 100, 100, 0], np.dtype(">i4")), np.array([11, 11111, 1, 0], np.dtype(">i8")), np.array([1.1, 1.25, 188.1, 0], np.dtype(">f4")), np.array([10011.1, 101.0, 688.25, 6.25], np.dtype(">f8")), np.array(["test01", "test02", "test03", "test04"]), ] np_timestamps_ = np.array([1, 2, 3, 4], np.dtype(">i8")) np_tablet_ = NumpyTablet( "root.sg_test_01.d_02", measurements_, data_types_, np_values_, np_timestamps_, ) if session.insert_tablet(np_tablet_) < 0: test_fail() print_message("insert numpy tablet failed") # insert multiple tablets into database tablet_01 = Tablet("root.sg_test_01.d_01", measurements_, data_types_, values_, [8, 9, 10, 11]) tablet_02 = Tablet( "root.sg_test_01.d_01", measurements_, data_types_, values_, [12, 13, 14, 15], ) if session.insert_tablets([tablet_01, tablet_02]) < 0: test_fail() print_message("insert tablets failed") # insert one tablet with empty cells into the database. values_ = [ [None, 10, 11, 1.1, 10011.1, "test01"], [True, None, 11111, 1.25, 101.0, "test02"], [False, 100, 1, None, 688.25, "test03"], [True, 0, 0, 0, None, None], ] # Non-ASCII text will cause error since bytes can only hold 0-128 nums. timestamps_ = [20, 21, 22, 23] tablet_ = Tablet("root.sg_test_01.d_01", measurements_, data_types_, values_, timestamps_) if session.insert_tablet(tablet_) < 0: test_fail() print_message("insert tablet with empty cells failed") # insert records of one device time_list = [1, 2, 3] measurements_list = [ ["s_01", "s_02", "s_03"], ["s_01", "s_02", "s_03"], ["s_01", "s_02", "s_03"], ] data_types_list = [ [TSDataType.BOOLEAN, TSDataType.INT32, TSDataType.INT64], [TSDataType.BOOLEAN, TSDataType.INT32, TSDataType.INT64], [TSDataType.BOOLEAN, TSDataType.INT32, TSDataType.INT64], ] values_list = [[False, 22, 33], [True, 1, 23], [False, 15, 26]] if (session.insert_records_of_one_device( "root.sg_test_01.d_01", time_list, measurements_list, data_types_list, values_list, ) < 0): test_fail() print_message("insert records of one device failed") # execute non-query sql statement if (session.execute_non_query_statement( "insert into root.sg_test_01.d_01(timestamp, s_02) values(16, 188)" ) < 0): test_fail() print_message( "execute 'insert into root.sg_test_01.d_01(timestamp, s_02) values(16, 188)' failed" ) # execute sql query statement session_data_set = session.execute_query_statement( "select * from root.sg_test_01.d_01") session_data_set.set_fetch_size(1024) expect_count = 20 actual_count = 0 while session_data_set.has_next(): print(session_data_set.next()) actual_count += 1 session_data_set.close_operation_handle() if actual_count != expect_count: test_fail() print_message("query count mismatch: expect count: " + str(expect_count) + " actual count: " + str(actual_count)) # close session connection. session.close()
def performance_test( measure_tstype_infos, data_file_name, use_new=True, check_result=False, row=10000, col=5000, ): """ execute tablet insert using original or new methods. :param measure_tstype_infos: key(str): measurement name, value(TSDataType): measurement data type :param use_new: True if check out the result :param data_file_name: the csv file name to insert :param row: tablet row number :param col: tablet column number """ print( f"Test python: use new: {use_new}, row: {row}, col: {col}. measurements: {measure_tstype_infos}" ) print(f"Total points: {len(measure_tstype_infos) * row * col}") # open the session and clean data session = create_open_session() session.execute_non_query_statement("delete timeseries root.*") # test start st = time.perf_counter() csv_data = load_csv_data(measure_tstype_infos, data_file_name) load_cost = time.perf_counter() - st insert_cost = 0 measurements = list(measure_tstype_infos.keys()) data_types = list(measure_tstype_infos.values()) for i in range(0, col): # if i % 500 == 0: # print(f"insert {i} cols") device_id = "root.sg%d.%d" % (i % 8, i) if not use_new: # Use the ORIGINAL method to construct tablet timestamps_ = [] values = [] for t in range(0, row): timestamps_.append(csv_data.at[t, TIME_STR]) value_array = [] for m in measurements: value_array.append(csv_data.at[t, m]) values.append(value_array) else: # Use the NEW method to construct tablet timestamps_ = csv_data[TIME_STR].values if timestamps_.dtype != FORMAT_CHAR_OF_TYPES[TSDataType.INT64]: timestamps_ = timestamps_.astype( FORMAT_CHAR_OF_TYPES[TSDataType.INT64]) values = [] for measure, tstype in measure_tstype_infos.items(): type_char = FORMAT_CHAR_OF_TYPES[tstype] value_array = csv_data[measure].values if value_array.dtype != type_char: if not (tstype == TSDataType.TEXT and value_array.dtype == object): value_array = value_array.astype(type_char) values.append(value_array) tablet = Tablet(device_id, measurements, data_types, values, timestamps_, use_new=use_new) cost_st = time.perf_counter() session.insert_tablet(tablet) insert_cost += time.perf_counter() - cost_st if check_result: check_count(row, session, "select count(*) from %s" % device_id) expect = [] for t in range(row): line = [str(csv_data.at[t, TIME_STR])] for m in measurements: line.append(str(csv_data.at[t, m])) expect.append("\t\t".join([v for v in line])) check_query_result( expect, session, f"select {','.join(measurements)} from {device_id}") print("query validation have passed") end = time.perf_counter() # clean data and close the session session.execute_non_query_statement("delete timeseries root.*") session.close() print("load cost: %.3f s" % load_cost) print("construct tablet cost: %.3f s" % (end - st - insert_cost - load_cost)) print("insert tablet cost: %.3f s" % insert_cost) print("total cost: %.3f s" % (end - st))