def test_collection_multi_float_vectors(self): """ target: test collection with multi float vectors method: create collection with two float-vec fields expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) fields = [cf.gen_float_vec_field(), cf.gen_float_vec_field(name="tmp")] schema = cf.gen_collection_schema(fields=fields) self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.check_collection_property, check_items={exp_name: c_name, exp_schema: schema})
def test_collection_dup_name_new_dim(self): """ target: test collection with dup name and new dim schema method: 1. default schema 2. schema with new dim expected: raise exception """ self._connect() new_dim = 120 c_name = cf.gen_unique_str(prefix) collection_w = self.init_collection_wrap( name=c_name, check_task=CheckTasks.check_collection_property, check_items={ exp_name: c_name, exp_schema: default_schema }) schema = cf.gen_default_collection_schema() new_fields = cf.gen_float_vec_field(dim=new_dim) schema.fields[-1] = new_fields error = { ct.err_code: 1, ct.err_msg: "The collection already exist, but the schema isnot the same as the " "passed in" } self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) assert collection_w.primary_field is None
def init_multi_fields_collection_wrap(self, name=cf.gen_unique_str()): vec_fields = [cf.gen_float_vec_field(ct.another_float_vec_field_name)] schema = cf.gen_schema_multi_vector_fields(vec_fields) collection_w = self.init_collection_wrap(name=name, schema=schema) df = cf.gen_dataframe_multi_vec_fields(vec_fields=vec_fields) collection_w.insert(df) assert collection_w.num_entities == ct.default_nb return collection_w, df
def test_collection_vector_out_bounds_dim(self, dim): """ target: test collection with out of bounds dim method: invalid dim -1 and 32759 expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) float_vec_field = cf.gen_float_vec_field(dim=dim) schema = cf.gen_collection_schema(fields=[float_vec_field]) error = {ct.err_code: 0, ct.err_msg: "invalid dimension: {}. should be in range 1 ~ 32768".format(dim)} self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error)
def test_collection_mix_vectors(self): """ target: test collection with mix vectors method: create with float and binary vec expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) fields = [cf.gen_float_vec_field(), cf.gen_binary_vec_field()] schema = cf.gen_collection_schema(fields=fields) self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.check_collection_property, check_items={exp_name: c_name, exp_schema: schema})
def test_collection_field_float_type(self): """ target: test collection with float type method: create field with float type expected: """ self._connect() c_name = cf.gen_unique_str(prefix) field, _ = self.field_schema_wrap.init_field_schema(name=ct.default_int64_field_name, dtype=5.0) schema = cf.gen_collection_schema(fields=[field, cf.gen_float_vec_field()]) error = {ct.err_code: 0, ct.err_msg: "Field type must be of DataType"} self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error)
def test_collection_none_field_name(self): """ target: test field schema with None name method: None field name expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) field, _ = self.field_schema_wrap.init_field_schema(name=None, dtype=5) schema = cf.gen_collection_schema(fields=[field, cf.gen_float_vec_field()]) error = {ct.err_code: 1, ct.err_msg: "You should specify the name of field"} self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error)
def test_collection_vector_invalid_dim(self, get_invalid_dim): """ target: test collection with invalid dimension method: define float-vec field with invalid dimension expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) float_vec_field = cf.gen_float_vec_field(dim=get_invalid_dim) schema = cf.gen_collection_schema(fields=[float_vec_field]) error = {ct.err_code: 0, ct.err_msg: "dim must be of int"} self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error)
def test_collection_non_vector_field_dim(self): """ target: test collection with dim for non-vector field method: define int64 field with dim expected: no exception """ self._connect() c_name = cf.gen_unique_str(prefix) int_field, _ = self.field_schema_wrap.init_field_schema(name="int", dtype=DataType.INT64, dim=ct.default_dim) float_vec_field = cf.gen_float_vec_field() schema = cf.gen_collection_schema(fields=[int_field, float_vec_field]) self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.check_collection_property, check_items={exp_name: c_name, exp_schema: schema})
def test_collection_invalid_field_name(self, name): """ target: test collection with invalid field name method: invalid string name expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) field, _ = self.field_schema_wrap.init_field_schema(name=name, dtype=5) vec_field = cf.gen_float_vec_field() schema = cf.gen_collection_schema(fields=[field, vec_field]) error = {ct.err_code: 1, ct.err_msg: "Invalid field name"} self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error)
def test_collection_primary_inconsistent(self): """ target: test collection with different primary field setting method: 1. set A field is_primary 2. set primary_field is B expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) int_field = cf.gen_int64_field(name="int", is_primary=True) float_vec_field = cf.gen_float_vec_field(name="vec") schema = cf.gen_collection_schema(fields=[int_field, float_vec_field], primary_field="vec") error = {ct.err_code: 0, ct.err_msg: "there are more than one primary key"} self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error)
def test_collection_multi_primary_fields(self): """ target: test collection with multi primary method: collection with two primary fields expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) int_field = cf.gen_int64_field(is_primary=True) float_vec_field = cf.gen_float_vec_field(is_primary=True) schema = cf.gen_collection_schema(fields=[int_field, float_vec_field]) error = {ct.err_code: 0, ct.err_msg: "there are more than one primary key"} self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error)
def test_collection_unsupported_primary_field(self, get_unsupported_primary_field): """ target: test collection with unsupported primary field type method: specify non-int64 as primary field expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) field = get_unsupported_primary_field vec_field = cf.gen_float_vec_field(name="vec") schema = cf.gen_collection_schema(fields=[field, vec_field], primary_field=field.name) error = {ct.err_code: 1, ct.err_msg: "the data type of primary key should be int64"} self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error)
def test_collection_field_primary_false(self): """ target: test collection with primary false method: define field with is_primary false expected: no exception """ self._connect() c_name = cf.gen_unique_str(prefix) int_field = cf.gen_int64_field(name="int") float_vec_field = cf.gen_float_vec_field() schema = cf.gen_collection_schema(fields=[int_field, float_vec_field]) self.collection_wrap.init_collection(c_name, schema=schema) assert self.collection_wrap.primary_field is None assert self.collection_wrap.schema.auto_id
def test_collection_dup_name_new_dim(self): """ target: test collection with dup name and new dim schema method: 1. default schema 2. schema with new dim expected: raise exception """ self._connect() new_dim = 120 c_name, collection = self._collection() assert_default_collection(collection, c_name) schema = cf.gen_default_collection_schema() new_fields = cf.gen_float_vec_field(dim=new_dim) schema.fields[-1] = new_fields ex, _ = self.collection.collection_init(c_name, schema=schema) assert "The collection already exist, but the schema isnot the same as the passed in" in str( ex) assert collection.primary_field is None
def test_query_expr_non_primary_field(self): """ target: test query on non-primary field method: query on non-primary int field expected: raise exception """ fields = [ cf.gen_int64_field(), cf.gen_int64_field(name='int2', is_primary=True), cf.gen_float_vec_field() ] schema = cf.gen_collection_schema(fields) collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix), schema=schema) nb = 100 data = [[i for i in range(nb)], [i for i in range(nb)], cf.gen_vectors(nb, ct.default_dim)] collection_w.insert(data) assert collection_w.num_entities == nb assert collection_w.primary_field.name == 'int2' error = {ct.err_code: 1, ct.err_msg: "column is not primary key"} collection_w.query(default_term_expr, check_task=CheckTasks.err_res, check_items=error)
class TestInsertOperation(TestcaseBase): """ ****************************************************************** The following cases are used to test insert interface operations ****************************************************************** """ @pytest.fixture(scope="function", params=[8, 4096]) def dim(self, request): yield request.param @pytest.mark.tags(CaseLabel.L1) def test_insert_without_connection(self): """ target: test insert without connection method: insert after remove connection expected: raise exception """ c_name = cf.gen_unique_str(prefix) collection_w = self.init_collection_wrap(name=c_name) self.connection_wrap.remove_connection(ct.default_alias) res_list, _ = self.connection_wrap.list_connections() assert ct.default_alias not in res_list data = cf.gen_default_list_data(10) error = {ct.err_code: 0, ct.err_msg: 'should create connect first'} collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.parametrize("vec_fields", [[cf.gen_float_vec_field(name="float_vector1")], [cf.gen_binary_vec_field()], [cf.gen_binary_vec_field(), cf.gen_binary_vec_field("binary_vec")]]) def test_insert_multi_float_vec_fields(self, vec_fields): """ target: test insert into multi float vec fields collection method: create collection with different schema and insert expected: verify num entities """ schema = cf.gen_schema_multi_vector_fields(vec_fields) collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), schema=schema) df = cf.gen_dataframe_multi_vec_fields(vec_fields=vec_fields) collection_w.insert(df) assert collection_w.num_entities == ct.default_nb @pytest.mark.tags(CaseLabel.L1) def test_insert_drop_collection(self): """ target: test insert and drop method: insert data and drop collection expected: verify collection if exist """ c_name = cf.gen_unique_str(prefix) collection_w = self.init_collection_wrap(name=c_name) collection_list, _ = self.utility_wrap.list_collections() assert collection_w.name in collection_list df = cf.gen_default_dataframe_data(ct.default_nb) collection_w.insert(data=df) collection_w.drop() collection_list, _ = self.utility_wrap.list_collections() assert collection_w.name not in collection_list @pytest.mark.tags(CaseLabel.L1) def test_insert_create_index(self): """ target: test insert and create index method: 1. insert 2. create index expected: verify num entities and index """ collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix)) df = cf.gen_default_dataframe_data(ct.default_nb) collection_w.insert(data=df) assert collection_w.num_entities == ct.default_nb collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] index, _ = collection_w.index() assert index == Index(collection_w.collection, ct.default_float_vec_field_name, default_index_params) assert collection_w.indexes[0] == index @pytest.mark.tags(CaseLabel.L1) def test_insert_after_create_index(self): """ target: test insert after create index method: 1. create index 2. insert data expected: verify index and num entities """ collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix)) collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] index, _ = collection_w.index() assert index == Index(collection_w.collection, ct.default_float_vec_field_name, default_index_params) assert collection_w.indexes[0] == index df = cf.gen_default_dataframe_data(ct.default_nb) collection_w.insert(data=df) assert collection_w.num_entities == ct.default_nb @pytest.mark.tags(CaseLabel.L2) def test_insert_binary_after_index(self): """ target: test insert binary after index method: 1.create index 2.insert binary data expected: 1.index ok 2.num entities correct """ schema = cf.gen_default_binary_collection_schema() collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), schema=schema) collection_w.create_index(ct.default_binary_vec_field_name, default_binary_index_params) assert collection_w.has_index()[0] index, _ = collection_w.index() assert index == Index(collection_w.collection, ct.default_binary_vec_field_name, default_binary_index_params) assert collection_w.indexes[0] == index df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb) collection_w.insert(data=df) assert collection_w.num_entities == ct.default_nb @pytest.mark.tags(CaseLabel.L1) def test_insert_auto_id_create_index(self): """ target: test create index in auto_id=True collection method: 1.create auto_id=True collection and insert 2.create index expected: index correct """ schema = cf.gen_default_collection_schema(auto_id=True) collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), schema=schema) df = cf.gen_default_dataframe_data(ct.default_nb) df.drop(ct.default_int64_field_name, axis=1, inplace=True) mutation_res, _ = collection_w.insert(data=df) assert cf._check_primary_keys(mutation_res.primary_keys, ct.default_nb) assert collection_w.num_entities == ct.default_nb # create index collection_w.create_index(ct.default_float_vec_field_name, default_index_params) assert collection_w.has_index()[0] index, _ = collection_w.index() assert index == Index(collection_w.collection, ct.default_float_vec_field_name, default_index_params) assert collection_w.indexes[0] == index @pytest.mark.tags(CaseLabel.L1) def test_insert_auto_id_true(self): """ target: test insert ids fields values when auto_id=True method: 1.create collection with auto_id=True 2.insert without ids expected: verify primary_keys and num_entities """ c_name = cf.gen_unique_str(prefix) schema = cf.gen_default_collection_schema(auto_id=True) collection_w = self.init_collection_wrap(name=c_name, schema=schema) df = cf.gen_default_dataframe_data(ct.default_nb) df.drop(ct.default_int64_field_name, axis=1, inplace=True) mutation_res, _ = collection_w.insert(data=df) assert cf._check_primary_keys(mutation_res.primary_keys, ct.default_nb) assert collection_w.num_entities == ct.default_nb @pytest.mark.tags(CaseLabel.L1) def test_insert_twice_auto_id_true(self): """ target: test insert ids fields twice when auto_id=True method: 1.create collection with auto_id=True 2.insert twice expected: verify primary_keys unique """ c_name = cf.gen_unique_str(prefix) schema = cf.gen_default_collection_schema(auto_id=True) nb = 10 collection_w = self.init_collection_wrap(name=c_name, schema=schema) df = cf.gen_default_dataframe_data(nb) df.drop(ct.default_int64_field_name, axis=1, inplace=True) mutation_res, _ = collection_w.insert(data=df) primary_keys = mutation_res.primary_keys assert cf._check_primary_keys(primary_keys, nb) mutation_res_1, _ = collection_w.insert(data=df) primary_keys.extend(mutation_res_1.primary_keys) assert cf._check_primary_keys(primary_keys, nb * 2) assert collection_w.num_entities == nb * 2 @pytest.mark.tags(CaseLabel.L1) def test_insert_auto_id_true_list_data(self): """ target: test insert ids fields values when auto_id=True method: 1.create collection with auto_id=True 2.insert list data with ids field values expected: assert num entities """ c_name = cf.gen_unique_str(prefix) schema = cf.gen_default_collection_schema(auto_id=True) collection_w = self.init_collection_wrap(name=c_name, schema=schema) data = cf.gen_default_list_data(nb=ct.default_nb) mutation_res, _ = collection_w.insert(data=data[1:]) assert mutation_res.insert_count == ct.default_nb assert cf._check_primary_keys(mutation_res.primary_keys, ct.default_nb) assert collection_w.num_entities == ct.default_nb @pytest.mark.tags(CaseLabel.L1) def test_insert_auto_id_true_with_dataframe_values(self): """ target: test insert with auto_id=True method: create collection with auto_id=True expected: 1.verify num entities 2.verify ids """ c_name = cf.gen_unique_str(prefix) schema = cf.gen_default_collection_schema(auto_id=True) collection_w = self.init_collection_wrap(name=c_name, schema=schema) df = cf.gen_default_dataframe_data(nb=100) error = {ct.err_code: 0, ct.err_msg: 'Auto_id is True, primary field should not have data'} collection_w.insert(data=df, check_task=CheckTasks.err_res, check_items=error) assert collection_w.is_empty @pytest.mark.tags(CaseLabel.L1) def test_insert_auto_id_true_with_list_values(self): """ target: test insert with auto_id=True method: create collection with auto_id=True expected: 1.verify num entities 2.verify ids """ c_name = cf.gen_unique_str(prefix) schema = cf.gen_default_collection_schema(auto_id=True) collection_w = self.init_collection_wrap(name=c_name, schema=schema) data = cf.gen_default_list_data(nb=100) error = {ct.err_code: 0, ct.err_msg: 'The data fields number is not match with schema'} collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error) assert collection_w.is_empty @pytest.mark.tags(CaseLabel.L1) def test_insert_auto_id_false_same_values(self): """ target: test insert same ids with auto_id false method: 1.create collection with auto_id=False 2.insert same int64 field values expected: raise exception """ c_name = cf.gen_unique_str(prefix) collection_w = self.init_collection_wrap(name=c_name) nb = 100 data = cf.gen_default_list_data(nb=nb) data[0] = [1 for i in range(nb)] mutation_res, _ = collection_w.insert(data) assert mutation_res.insert_count == nb assert mutation_res.primary_keys == data[0] @pytest.mark.tags(CaseLabel.L1) def test_insert_auto_id_false_negative_values(self): """ target: test insert negative ids with auto_id false method: auto_id=False, primary field values is negative expected: verify num entities """ c_name = cf.gen_unique_str(prefix) collection_w = self.init_collection_wrap(name=c_name) nb = 100 data = cf.gen_default_list_data(nb) data[0] = [i for i in range(0, -nb, -1)] mutation_res, _ = collection_w.insert(data) assert mutation_res.primary_keys == data[0] assert collection_w.num_entities == nb @pytest.mark.tags(CaseLabel.L2) def test_insert_multi_threading(self): """ target: test concurrent insert method: multi threads insert expected: verify num entities """ collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix)) df = cf.gen_default_dataframe_data(ct.default_nb) thread_num = 4 threads = [] primary_keys = df[ct.default_int64_field_name].values.tolist() def insert(thread_i): log.debug(f'In thread-{thread_i}') mutation_res, _ = collection_w.insert(df) assert mutation_res.insert_count == ct.default_nb assert mutation_res.primary_keys == primary_keys for i in range(thread_num): x = threading.Thread(target=insert, args=(i,)) threads.append(x) x.start() for t in threads: t.join() assert collection_w.num_entities == ct.default_nb * thread_num @pytest.mark.tags(CaseLabel.L2) @pytest.mark.skip(reason="Currently primary keys are not unique") def test_insert_multi_threading_auto_id(self): """ target: test concurrent insert auto_id=True collection method: 1.create auto_id=True collection 2.concurrent insert expected: verify primary keys unique """ pass @pytest.mark.tags(CaseLabel.L2) def test_insert_multi_times(self, dim): """ target: test insert multi times method: insert data multi times expected: verify num entities """ step = 120 nb = 12000 collection_w = self.init_collection_general(prefix, False, dim=dim)[0] for _ in range(nb // step): df = cf.gen_default_dataframe_data(step, dim) mutation_res, _ = collection_w.insert(data=df) assert mutation_res.insert_count == step assert mutation_res.primary_keys == df[ct.default_int64_field_name].values.tolist() assert collection_w.num_entities == nb @pytest.mark.tags(CaseLabel.L2) def test_insert_all_datatype_collection(self): """ target: test insert into collection that contains all datatype fields method: 1.create all datatype collection 2.insert data expected: verify num entities """ self._connect() nb = 100 df = cf.gen_dataframe_all_data_type(nb=nb) self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df, primary_field=ct.default_int64_field_name) assert self.collection_wrap.num_entities == nb
class TestCollectionParams(TestcaseBase): """ Test case of collection interface """ @pytest.fixture(scope="function", params=ct.get_invalid_strs) def get_invalid_type_schema(self, request): if request.param is None: pytest.skip("None schema is valid") yield request.param @pytest.fixture(scope="function", params=ct.get_invalid_strs) def get_invalid_type_fields(self, request): if isinstance(request.param, list): pytest.skip("list is valid fields") yield request.param @pytest.fixture(scope="function", params=cf.gen_all_type_fields()) def get_unsupported_primary_field(self, request): if request.param.dtype == DataType.INT64: pytest.skip("int64 type is valid primary key") yield request.param @pytest.fixture(scope="function", params=ct.get_invalid_strs) def get_invalid_dim(self, request): if request.param == 1: request.param = 0 yield request.param @pytest.mark.tags(CaseLabel.L0) def test_collection(self): """ target: test collection with default schema method: create collection with default schema expected: assert collection property """ self._connect() c_name = cf.gen_unique_str(prefix) self.collection_wrap.init_collection( c_name, schema=default_schema, check_task=CheckTasks.check_collection_property, check_items={ exp_name: c_name, exp_schema: default_schema }) assert c_name, _ in self.utility_wrap.list_collections() @pytest.mark.tags(CaseLabel.L0) def test_collection_empty_name(self): """ target: test collection with empty name method: create collection with a empty name expected: raise exception """ self._connect() c_name = "" error = {ct.err_code: 1, ct.err_msg: "value is illegal"} self.collection_wrap.init_collection(c_name, schema=default_schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("name", [[], 1, [1, "2", 3], (1, ), {1: 1}, None]) def test_collection_illegal_name(self, name): """ target: test collection with illegal name method: create collection with illegal name expected: raise exception """ self._connect() error = { ct.err_code: 1, ct.err_msg: "`collection_name` value {} is illegal".format(name) } self.collection_wrap.init_collection(name, schema=default_schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("name", [ "12-s", "12 s", "(mn)", "中文", "%$#", "a".join("a" for i in range(256)) ]) def test_collection_invalid_name(self, name): """ target: test collection with invalid name method: create collection with invalid name expected: raise exception """ self._connect() error = { ct.err_code: 1, ct.err_msg: "Invalid collection name: {}".format(name) } self.collection_wrap.init_collection(name, schema=default_schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L0) def test_collection_dup_name(self): """ target: test collection with dup name method: create collection with dup name and none schema and data expected: collection properties consistent """ self._connect() c_name = cf.gen_unique_str(prefix) collection_w = self.init_collection_wrap( name=c_name, check_task=CheckTasks.check_collection_property, check_items={ exp_name: c_name, exp_schema: default_schema }) self.collection_wrap.init_collection(collection_w.name) assert collection_w.name == self.collection_wrap.name assert collection_w.schema == self.collection_wrap.schema assert collection_w.num_entities == self.collection_wrap.num_entities assert collection_w.name, _ in self.utility_wrap.list_collections()[0] @pytest.mark.tags(CaseLabel.L1) def test_collection_dup_name_with_desc(self): """ target: test collection with dup name method: 1. default schema with desc 2. dup name collection expected: desc consistent """ self._connect() c_name = cf.gen_unique_str(prefix) schema = cf.gen_default_collection_schema( description=ct.collection_desc) collection_w = self.init_collection_wrap( name=c_name, schema=schema, check_task=CheckTasks.check_collection_property, check_items={ exp_name: c_name, exp_schema: schema }) self.collection_wrap.init_collection( c_name, check_task=CheckTasks.check_collection_property, check_items={ exp_name: c_name, exp_schema: schema }) assert collection_w.description == self.collection_wrap.description @pytest.mark.tags(CaseLabel.L1) def test_collection_dup_name_new_schema(self): """ target: test collection with dup name and new schema method: 1.create collection with default schema 2. collection with dup name and new schema expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) self.init_collection_wrap( name=c_name, check_task=CheckTasks.check_collection_property, check_items={ exp_name: c_name, exp_schema: default_schema }) fields = [cf.gen_int64_field()] schema = cf.gen_collection_schema(fields=fields) error = { ct.err_code: 1, ct.err_msg: "The collection already exist, but the schema isnot the same as the " "passed in" } self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) def test_collection_dup_name_new_primary(self): """ target: test collection with dup name and new primary_field schema method: 1.collection with default schema 2. collection with same fields and new primary_field schema expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) collection_w = self.init_collection_wrap( name=c_name, check_task=CheckTasks.check_collection_property, check_items={ exp_name: c_name, exp_schema: default_schema }) schema = cf.gen_default_collection_schema( primary_field=ct.default_int64_field_name) error = { ct.err_code: 1, ct.err_msg: "The collection already exist, but the schema isnot the same as the " "passed in" } self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) assert collection_w.primary_field is None @pytest.mark.tags(CaseLabel.L1) def test_collection_dup_name_new_dim(self): """ target: test collection with dup name and new dim schema method: 1. default schema 2. schema with new dim expected: raise exception """ self._connect() new_dim = 120 c_name = cf.gen_unique_str(prefix) collection_w = self.init_collection_wrap( name=c_name, check_task=CheckTasks.check_collection_property, check_items={ exp_name: c_name, exp_schema: default_schema }) schema = cf.gen_default_collection_schema() new_fields = cf.gen_float_vec_field(dim=new_dim) schema.fields[-1] = new_fields error = { ct.err_code: 1, ct.err_msg: "The collection already exist, but the schema isnot the same as the " "passed in" } self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) assert collection_w.primary_field is None @pytest.mark.tags(CaseLabel.L1) def test_collection_dup_name_invalid_schema_type(self, get_invalid_type_schema): """ target: test collection with dup name and invalid schema method: 1. default schema 2. invalid schema expected: raise exception and """ self._connect() c_name = cf.gen_unique_str(prefix) collection_w = self.init_collection_wrap( name=c_name, check_task=CheckTasks.check_collection_property, check_items={ exp_name: c_name, exp_schema: default_schema }) error = { ct.err_code: 1, ct.err_msg: "schema type must be schema.CollectionSchema" } schema = get_invalid_type_schema self.collection_wrap.init_collection(collection_w.name, schema=schema, check_task=CheckTasks.err_res, check_items=error) assert collection_w.name == c_name @pytest.mark.tags(CaseLabel.L1) def test_collection_dup_name_same_schema(self): """ target: test collection with dup name and same schema method: dup name and same schema expected: two collection object is available """ self._connect() c_name = cf.gen_unique_str(prefix) collection_w = self.init_collection_wrap( name=c_name, schema=default_schema, check_task=CheckTasks.check_collection_property, check_items={ exp_name: c_name, exp_schema: default_schema }) self.collection_wrap.init_collection( name=c_name, schema=default_schema, check_task=CheckTasks.check_collection_property, check_items={ exp_name: c_name, exp_schema: default_schema }) assert collection_w.name == self.collection_wrap.name @pytest.mark.tags(CaseLabel.L0) def test_collection_none_schema(self): """ target: test collection with none schema method: create collection with none schema expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) error = {ct.err_code: 1, ct.err_msg: "Collection missing schema"} self.collection_wrap.init_collection(c_name, schema=None, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L0) def test_collection_invalid_type_schema(self, get_invalid_type_schema): """ target: test collection with invalid schema method: create collection with non-CollectionSchema type schema expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) error = { ct.err_code: 1, ct.err_msg: "schema type must be schema.CollectionSchema" } self.collection_wrap.init_collection(c_name, schema=get_invalid_type_schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) def test_collection_invalid_type_fields(self, get_invalid_type_fields): """ target: test collection with invalid fields type, non-list method: create collection schema with non-list invalid fields expected: exception """ self._connect() fields = get_invalid_type_fields error = { ct.err_code: 1, ct.err_msg: "The fields of schema must be type list" } self.collection_schema_wrap.init_collection_schema( fields=fields, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) def test_collection_with_unknown_type(self): """ target: test collection with unknown type method: create with DataType.UNKNOWN expected: raise exception """ self._connect() error = { ct.err_code: 0, ct.err_msg: "Field type not support <DataType.UNKNOWN: 999" } self.field_schema_wrap.init_field_schema(name="unknown", dtype=DataType.UNKNOWN, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("name", [[], 1, (1, ), {1: 1}, "12-s"]) def test_collection_invalid_type_field(self, name): """ target: test collection with invalid field name method: invalid string name expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) field, _ = self.field_schema_wrap.init_field_schema(name=name, dtype=5) vec_field = cf.gen_float_vec_field() schema = cf.gen_collection_schema(fields=[field, vec_field]) error = {ct.err_code: 1, ct.err_msg: "expected one of: bytes, unicode"} self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("name", [ "12-s", "12 s", "(mn)", "中文", "%$#", "a".join("a" for i in range(256)) ]) def test_collection_invalid_field_name(self, name): """ target: test collection with invalid field name method: invalid string name expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) field, _ = self.field_schema_wrap.init_field_schema(name=name, dtype=5) vec_field = cf.gen_float_vec_field() schema = cf.gen_collection_schema(fields=[field, vec_field]) error = {ct.err_code: 1, ct.err_msg: "Invalid field name"} self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) def test_collection_none_field_name(self): """ target: test field schema with None name method: None field name expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) field, _ = self.field_schema_wrap.init_field_schema(name=None, dtype=5) schema = cf.gen_collection_schema( fields=[field, cf.gen_float_vec_field()]) error = { ct.err_code: 1, ct.err_msg: "You should specify the name of field" } self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("dtype", [6, [[]], {}, (), "", "a"]) def test_collection_invalid_field_type(self, dtype): """ target: test collection with invalid field type method: invalid DataType expected: raise exception """ self._connect() error = {ct.err_code: 0, ct.err_msg: "Field type must be of DataType"} self.field_schema_wrap.init_field_schema(name="test", dtype=dtype, check_task=CheckTasks.err_res, check_items=error) def test_collection_field_float_type(self): """ target: test collection with float type method: create field with float type expected: """ self._connect() c_name = cf.gen_unique_str(prefix) field, _ = self.field_schema_wrap.init_field_schema( name=ct.default_int64_field_name, dtype=5.0) schema = cf.gen_collection_schema( fields=[field, cf.gen_float_vec_field()]) error = {ct.err_code: 0, ct.err_msg: "Field type must be of DataType"} self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L0) def test_collection_empty_fields(self): """ target: test collection with empty fields method: create collection with fields = [] expected: exception """ self._connect() c_name = cf.gen_unique_str(prefix) schema = cf.gen_collection_schema(fields=[]) error = { ct.err_code: 0, ct.err_msg: "The field of the schema cannot be empty" } self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) def test_collection_dup_field(self): """ target: test collection with dup field name method: Two FieldSchema have same name expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) field_one = cf.gen_int64_field() field_two = cf.gen_int64_field() schema = cf.gen_collection_schema(fields=[field_one, field_two]) error = {ct.err_code: 0, ct.err_msg: "duplicated field name"} self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) assert not self.utility_wrap.has_collection(c_name)[0] @pytest.mark.tags(CaseLabel.L0) @pytest.mark.parametrize( "field", [cf.gen_float_vec_field(), cf.gen_binary_vec_field()]) def test_collection_only_vector(self, field): """ target: test collection just with vec field method: create with float-vec fields expected: no exception """ self._connect() c_name = cf.gen_unique_str(prefix) schema = cf.gen_collection_schema(fields=[field]) self.collection_wrap.init_collection( c_name, schema=schema, check_task=CheckTasks.check_collection_property, check_items={ exp_name: c_name, exp_schema: schema }) @pytest.mark.tags(CaseLabel.L1) def test_collection_multi_float_vectors(self): """ target: test collection with multi float vectors method: create collection with two float-vec fields expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) fields = [cf.gen_float_vec_field(), cf.gen_float_vec_field(name="tmp")] schema = cf.gen_collection_schema(fields=fields) self.collection_wrap.init_collection( c_name, schema=schema, check_task=CheckTasks.check_collection_property, check_items={ exp_name: c_name, exp_schema: schema }) @pytest.mark.tags(CaseLabel.L1) def test_collection_mix_vectors(self): """ target: test collection with mix vectors method: create with float and binary vec expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) fields = [cf.gen_float_vec_field(), cf.gen_binary_vec_field()] schema = cf.gen_collection_schema(fields=fields) self.collection_wrap.init_collection( c_name, schema=schema, check_task=CheckTasks.check_collection_property, check_items={ exp_name: c_name, exp_schema: schema }) @pytest.mark.tags(CaseLabel.L0) def test_collection_without_vectors(self): """ target: test collection without vectors method: create collection only with int field expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) schema = cf.gen_collection_schema([cf.gen_int64_field()]) error = { ct.err_code: 0, ct.err_msg: "The schema must have vector column" } self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L0) def test_collection_primary_field(self): """ target: test collection with primary field method: specify primary field expected: collection.primary_field """ self._connect() c_name = cf.gen_unique_str(prefix) schema = cf.gen_default_collection_schema( primary_field=ct.default_int64_field_name) self.collection_wrap.init_collection(c_name, schema=schema) assert self.collection_wrap.primary_field.name == ct.default_int64_field_name @pytest.mark.tags(CaseLabel.L1) def test_collection_unsupported_primary_field( self, get_unsupported_primary_field): """ target: test collection with unsupported primary field type method: specify non-int64 as primary field expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) field = get_unsupported_primary_field vec_field = cf.gen_float_vec_field(name="vec") schema = cf.gen_collection_schema(fields=[field, vec_field], primary_field=field.name) error = { ct.err_code: 1, ct.err_msg: "the data type of primary key should be int64" } self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) def test_collection_multi_primary_fields(self): """ target: test collection with multi primary method: collection with two primary fields expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) int_field = cf.gen_int64_field(is_primary=True) float_vec_field = cf.gen_float_vec_field(is_primary=True) schema = cf.gen_collection_schema(fields=[int_field, float_vec_field]) error = { ct.err_code: 0, ct.err_msg: "there are more than one primary key" } self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) def test_collection_primary_inconsistent(self): """ target: test collection with different primary field setting method: 1. set A field is_primary 2. set primary_field is B expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) int_field = cf.gen_int64_field(name="int", is_primary=True) float_vec_field = cf.gen_float_vec_field(name="vec") schema = cf.gen_collection_schema(fields=[int_field, float_vec_field], primary_field="vec") error = { ct.err_code: 0, ct.err_msg: "there are more than one primary key" } self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) def test_collection_field_primary_false(self): """ target: test collection with primary false method: define field with is_primary false expected: no exception """ self._connect() c_name = cf.gen_unique_str(prefix) int_field = cf.gen_int64_field(name="int") float_vec_field = cf.gen_float_vec_field() schema = cf.gen_collection_schema(fields=[int_field, float_vec_field]) self.collection_wrap.init_collection(c_name, schema=schema) assert self.collection_wrap.primary_field is None assert self.collection_wrap.schema.auto_id @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("is_primary", ct.get_invalid_strs) def test_collection_field_invalid_primary(self, is_primary): """ target: test collection with invalid primary method: define field with is_primary=non-bool expected: raise exception """ self._connect() name = cf.gen_unique_str(prefix) error = { ct.err_code: 0, ct.err_msg: "Param is_primary must be bool type" } self.field_schema_wrap.init_field_schema(name=name, dtype=DataType.INT64, is_primary=is_primary, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L0) @pytest.mark.parametrize("dtype", [DataType.FLOAT_VECTOR, DataType.BINARY_VECTOR]) def test_collection_vector_without_dim(self, dtype): """ target: test collection without dimension method: define vector field without dim expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) float_vec_field, _ = self.field_schema_wrap.init_field_schema( name="vec", dtype=dtype) schema = cf.gen_collection_schema(fields=[float_vec_field]) error = { ct.err_code: 0, ct.err_msg: "dimension is not defined in field type params" } self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) def test_collection_vector_invalid_dim(self, get_invalid_dim): """ target: test collection with invalid dimension method: define float-vec field with invalid dimension expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) float_vec_field = cf.gen_float_vec_field(dim=get_invalid_dim) schema = cf.gen_collection_schema(fields=[float_vec_field]) error = {ct.err_code: 0, ct.err_msg: "dim must be of int"} self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("dim", [-1, 32769]) def test_collection_vector_out_bounds_dim(self, dim): """ target: test collection with out of bounds dim method: invalid dim -1 and 32759 expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) float_vec_field = cf.gen_float_vec_field(dim=dim) schema = cf.gen_collection_schema(fields=[float_vec_field]) error = { ct.err_code: 0, ct.err_msg: "invalid dimension: {}. should be in range 1 ~ 32768".format(dim) } self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) def test_collection_non_vector_field_dim(self): """ target: test collection with dim for non-vector field method: define int64 field with dim expected: no exception """ self._connect() c_name = cf.gen_unique_str(prefix) int_field, _ = self.field_schema_wrap.init_field_schema( name="int", dtype=DataType.INT64, dim=ct.default_dim) float_vec_field = cf.gen_float_vec_field() schema = cf.gen_collection_schema(fields=[int_field, float_vec_field]) self.collection_wrap.init_collection( c_name, schema=schema, check_task=CheckTasks.check_collection_property, check_items={ exp_name: c_name, exp_schema: schema }) @pytest.mark.tags(CaseLabel.L1) def test_collection_desc(self): """ target: test collection with description method: create with description expected: assert default description """ self._connect() c_name = cf.gen_unique_str(prefix) schema = cf.gen_default_collection_schema( description=ct.collection_desc) self.collection_wrap.init_collection( c_name, schema=schema, check_task=CheckTasks.check_collection_property, check_items={ exp_name: c_name, exp_schema: schema }) @pytest.mark.tags(CaseLabel.L1) def test_collection_none_desc(self): """ target: test collection with none description method: create with none description expected: raise exception """ self._connect() c_name = cf.gen_unique_str(prefix) schema = cf.gen_default_collection_schema(description=None) error = {ct.err_code: 0, ct.err_msg: "expected one of: bytes, unicode"} self.collection_wrap.init_collection(c_name, schema=schema, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) def test_collection_long_desc(self): """ target: test collection with long desc method: create with long desc expected: """ self._connect() c_name = cf.gen_unique_str(prefix) desc = "a".join("a" for _ in range(256)) schema = cf.gen_default_collection_schema(description=desc) self.collection_wrap.init_collection( c_name, schema=schema, check_task=CheckTasks.check_collection_property, check_items={ exp_name: c_name, exp_schema: schema }) @pytest.mark.tags(CaseLabel.L0) @pytest.mark.xfail(reason="issue #5667") def test_collection_binary(self): """ target: test collection with binary-vec method: create collection with binary field expected: assert binary field """ self._connect() c_name = cf.gen_unique_str(prefix) self.collection_wrap.init_collection( c_name, schema=default_binary_schema, check_task=CheckTasks.check_collection_property, check_items={ exp_name: c_name, exp_schema: default_binary_schema }) assert c_name, _ in self.utility_wrap.list_collections()
class TestQueryBase(TestcaseBase): """ test Query interface query(collection_name, expr, output_fields=None, partition_names=None, timeout=None) """ @pytest.mark.tags(CaseLabel.L0) def test_query(self): """ target: test query method: query with term expr expected: verify query result """ # create collection, insert default_nb, load collection collection_w, vectors = self.init_collection_general( prefix, insert_data=True)[0:2] int_values = vectors[0][ct.default_int64_field_name].values.tolist() pos = 5 term_expr = f'{ct.default_int64_field_name} in {int_values[:pos]}' res = vectors[0].iloc[0:pos, :1].to_dict('records') collection_w.query(term_expr, check_task=CheckTasks.check_query_results, check_items={exp_res: res}) @pytest.mark.tags(CaseLabel.L1) def test_query_empty_collection(self): """ target: test query empty collection method: query on a empty collection expected: empty result """ c_name = cf.gen_unique_str(prefix) collection_w = self.init_collection_wrap(name=c_name) collection_w.load() res, _ = collection_w.query(default_term_expr) assert len(res) == 0 @pytest.mark.tags(CaseLabel.L0) def test_query_auto_id_collection(self): """ target: test query with auto_id=True collection method: test query with auto id expected: query result is correct """ self._connect() df = cf.gen_default_dataframe_data(ct.default_nb) df[ct.default_int64_field_name] = None insert_res, _, = self.collection_wrap.construct_from_dataframe( cf.gen_unique_str(prefix), df, primary_field=ct.default_int64_field_name, auto_id=True) assert self.collection_wrap.num_entities == ct.default_nb ids = insert_res[1].primary_keys pos = 5 res = df.iloc[:pos, :1].to_dict('records') self.collection_wrap.load() # query with all primary keys term_expr_1 = f'{ct.default_int64_field_name} in {ids[:pos]}' for i in range(5): res[i][ct.default_int64_field_name] = ids[i] self.collection_wrap.query(term_expr_1, check_task=CheckTasks.check_query_results, check_items={exp_res: res}) # query with part primary keys term_expr_2 = f'{ct.default_int64_field_name} in {[ids[0], 0]}' self.collection_wrap.query(term_expr_2, check_task=CheckTasks.check_query_results, check_items={exp_res: res[:1]}) @pytest.mark.tags(CaseLabel.L1) def test_query_auto_id_not_existed_primary_key(self): """ target: test query on auto_id true collection method: 1.create auto_id true collection 2.query with not existed primary keys expected: query result is empty """ schema = cf.gen_default_collection_schema(auto_id=True) collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix), schema=schema) df = cf.gen_default_dataframe_data(ct.default_nb) df.drop(ct.default_int64_field_name, axis=1, inplace=True) mutation_res, _ = collection_w.insert(data=df) assert collection_w.num_entities == ct.default_nb collection_w.load() term_expr = f'{ct.default_int64_field_name} in [0, 1, 2]' res, _ = collection_w.query(term_expr) assert len(res) == 0 @pytest.mark.tags(CaseLabel.L1) def test_query_expr_none(self): """ target: test query with none expr method: query with expr None expected: raise exception """ collection_w, vectors = self.init_collection_general( prefix, insert_data=True)[0:2] error = {ct.err_code: 0, ct.err_msg: "The type of expr must be string"} collection_w.query(None, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) def test_query_expr_non_string(self): """ target: test query with non-string expr method: query with non-string expr, eg 1, [] .. expected: raise exception """ collection_w, vectors = self.init_collection_general( prefix, insert_data=True)[0:2] exprs = [1, 2., [], {}, ()] error = {ct.err_code: 0, ct.err_msg: "The type of expr must be string"} for expr in exprs: collection_w.query(expr, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) def test_query_expr_invalid_string(self): """ target: test query with invalid expr method: query with invalid string expr expected: raise exception """ collection_w, vectors = self.init_collection_general( prefix, insert_data=True)[0:2] error = {ct.err_code: 1, ct.err_msg: "Invalid expression!"} exprs = ["12-s", "中文", "a", " "] for expr in exprs: collection_w.query(expr, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.skip(reason="repeat with test_query, waiting for other expr") def test_query_expr_term(self): """ target: test query with TermExpr method: query with TermExpr expected: query result is correct """ collection_w, vectors = self.init_collection_general( prefix, insert_data=True)[0:2] res = vectors[0].iloc[:2, :1].to_dict('records') collection_w.query(default_term_expr, check_task=CheckTasks.check_query_results, check_items={exp_res: res}) @pytest.mark.tags(CaseLabel.L1) def test_query_expr_not_existed_field(self): """ target: test query with not existed field method: query by term expr with fake field expected: raise exception """ collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix)) term_expr = 'field in [1, 2]' error = {ct.err_code: 1, ct.err_msg: "fieldName(field) not found"} collection_w.query(term_expr, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) def test_query_expr_unsupported_field(self): """ target: test query on unsupported field method: query on float field expected: raise exception """ collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix)) term_expr = f'{ct.default_float_field_name} in [1., 2.]' error = {ct.err_code: 1, ct.err_msg: "column is not int64"} collection_w.query(term_expr, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) def test_query_expr_non_primary_field(self): """ target: test query on non-primary field method: query on non-primary int field expected: raise exception """ fields = [ cf.gen_int64_field(), cf.gen_int64_field(name='int2', is_primary=True), cf.gen_float_vec_field() ] schema = cf.gen_collection_schema(fields) collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix), schema=schema) nb = 100 data = [[i for i in range(nb)], [i for i in range(nb)], cf.gen_vectors(nb, ct.default_dim)] collection_w.insert(data) assert collection_w.num_entities == nb assert collection_w.primary_field.name == 'int2' error = {ct.err_code: 1, ct.err_msg: "column is not primary key"} collection_w.query(default_term_expr, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) def test_query_expr_wrong_term_keyword(self): """ target: test query with wrong term expr keyword method: query with wrong keyword term expr expected: raise exception """ collection_w, vectors = self.init_collection_general( prefix, insert_data=True)[0:2] expr_1 = f'{ct.default_int64_field_name} inn [1, 2]' error_1 = { ct.err_code: 1, ct.err_msg: f'unexpected token Identifier("inn")' } collection_w.query(expr_1, check_task=CheckTasks.err_res, check_items=error_1) expr_2 = f'{ct.default_int64_field_name} not in [1, 2]' error_2 = {ct.err_code: 1, ct.err_msg: 'not top level term'} collection_w.query(expr_2, check_task=CheckTasks.err_res, check_items=error_2) expr_3 = f'{ct.default_int64_field_name} in not [1, 2]' error_3 = { ct.err_code: 1, ct.err_msg: 'right operand of the InExpr must be array' } collection_w.query(expr_3, check_task=CheckTasks.err_res, check_items=error_3) @pytest.mark.tags(CaseLabel.L1) def test_query_expr_non_array_term(self): """ target: test query with non-array term expr method: query with non-array term expr expected: raise exception """ exprs = [ f'{ct.default_int64_field_name} in 1', f'{ct.default_int64_field_name} in "in"', f'{ct.default_int64_field_name} in (mn)' ] collection_w, vectors = self.init_collection_general( prefix, insert_data=True)[0:2] error = { ct.err_code: 1, ct.err_msg: "right operand of the InExpr must be array" } for expr in exprs: collection_w.query(expr, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) def test_query_expr_empty_term_array(self): """ target: test query with empty array term expr method: query with empty term expr expected: empty result """ term_expr = f'{ct.default_int64_field_name} in []' collection_w, vectors = self.init_collection_general( prefix, insert_data=True)[0:2] res, _ = collection_w.query(term_expr) assert len(res) == 0 @pytest.mark.tags(CaseLabel.L1) def test_query_expr_inconsistent_mix_term_array(self): """ target: test query with term expr that field and array are inconsistent or mix type method: 1.query with int field and float values 2.query with term expr that has int and float type value expected: raise exception """ collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix)) int_values = [[1., 2.], [1, 2.]] error = {ct.err_code: 1, ct.err_msg: "type mismatch"} for values in int_values: term_expr = f'{ct.default_int64_field_name} in {values}' collection_w.query(term_expr, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) def test_query_expr_non_constant_array_term(self): """ target: test query with non-constant array term expr method: query with non-constant array expr expected: raise exception """ collection_w, vectors = self.init_collection_general( prefix, insert_data=True)[0:2] constants = [[1], (), {}] error = {ct.err_code: 1, ct.err_msg: "unsupported leaf node"} for constant in constants: term_expr = f'{ct.default_int64_field_name} in [{constant}]' collection_w.query(term_expr, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) def test_query_output_field_none_or_empty(self): """ target: test query with none and empty output field method: query with output field=None, field=[] expected: return primary field """ collection_w, vectors = self.init_collection_general( prefix, insert_data=True)[0:2] for fields in [None, []]: res, _ = collection_w.query(default_term_expr, output_fields=fields) assert list(res[0].keys()) == [ct.default_int64_field_name] @pytest.mark.tags(CaseLabel.L0) def test_query_output_one_field(self): """ target: test query with output one field method: query with output one field expected: return one field """ collection_w, vectors = self.init_collection_general( prefix, insert_data=True)[0:2] res, _ = collection_w.query( default_term_expr, output_fields=[ct.default_float_field_name]) assert set(res[0].keys()) == set( [ct.default_int64_field_name, ct.default_float_field_name]) @pytest.mark.tags(CaseLabel.L1) def test_query_output_all_fields(self): """ target: test query with none output field method: query with output field=None expected: return all fields """ collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix)) df = cf.gen_default_dataframe_data() collection_w.insert(df) assert collection_w.num_entities == ct.default_nb all_fields = [ ct.default_int64_field_name, ct.default_float_field_name, ct.default_float_vec_field_name ] res = df.iloc[:2].to_dict('records') collection_w.load() actual_res, _ = collection_w.query( default_term_expr, output_fields=all_fields, check_task=CheckTasks.check_query_results, check_items={ exp_res: res, "with_vec": True }) assert set(actual_res[0].keys()) == set(all_fields) @pytest.mark.tags(CaseLabel.L1) def test_query_output_float_vec_field(self): """ target: test query with vec output field method: specify vec field as output field expected: return primary field and vec field """ collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix)) df = cf.gen_default_dataframe_data() collection_w.insert(df) assert collection_w.num_entities == ct.default_nb fields = [[ct.default_float_vec_field_name], [ ct.default_int64_field_name, ct.default_float_vec_field_name ]] res = df.loc[:1, [ ct.default_int64_field_name, ct.default_float_vec_field_name ]].to_dict('records') collection_w.load() for output_fields in fields: collection_w.query(default_term_expr, output_fields=output_fields, check_task=CheckTasks.check_query_results, check_items={ exp_res: res, "with_vec": True }) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("vec_fields", [[cf.gen_float_vec_field(name="float_vector1")]]) def test_query_output_multi_float_vec_field(self, vec_fields): """ target: test query and output multi float vec fields method: a.specify multi vec field as output b.specify output_fields with wildcard % expected: verify query result """ # init collection with two float vector fields schema = cf.gen_schema_multi_vector_fields(vec_fields) collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix), schema=schema) df = cf.gen_dataframe_multi_vec_fields(vec_fields=vec_fields) collection_w.insert(df) assert collection_w.num_entities == ct.default_nb # query with two vec output_fields output_fields = [ ct.default_int64_field_name, ct.default_float_vec_field_name ] for vec_field in vec_fields: output_fields.append(vec_field.name) res = df.loc[:1, output_fields].to_dict('records') collection_w.load() collection_w.query(default_term_expr, output_fields=output_fields, check_task=CheckTasks.check_query_results, check_items={ exp_res: res, "with_vec": True }) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize( "vec_fields", [[cf.gen_binary_vec_field()], [cf.gen_binary_vec_field(), cf.gen_binary_vec_field("binary_vec1")]]) def test_query_output_mix_float_binary_field(self, vec_fields): """ target: test query and output mix float and binary vec fields method: a.specify mix vec field as output b.specify output_fields with wildcard % expected: output binary vector and float vec """ # init collection with two float vector fields schema = cf.gen_schema_multi_vector_fields(vec_fields) collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix), schema=schema) df = cf.gen_dataframe_multi_vec_fields(vec_fields=vec_fields) collection_w.insert(df) assert collection_w.num_entities == ct.default_nb # query with two vec output_fields output_fields = [ ct.default_int64_field_name, ct.default_float_vec_field_name ] for vec_field in vec_fields: output_fields.append(vec_field.name) res = df.loc[:1, output_fields].to_dict('records') collection_w.load() collection_w.query(default_term_expr, output_fields=output_fields, check_task=CheckTasks.check_query_results, check_items={ exp_res: res, "with_vec": True }) # query with wildcard % collection_w.query(default_term_expr, output_fields=["%"], check_task=CheckTasks.check_query_results, check_items={ exp_res: res, "with_vec": True }) @pytest.mark.tags(CaseLabel.L1) def test_query_output_binary_vec_field(self): """ target: test query with binary vec output field method: specify binary vec field as output field expected: return primary field and binary vec field """ collection_w, vectors = self.init_collection_general( prefix, insert_data=True, is_binary=True)[0:2] fields = [[ct.default_binary_vec_field_name], [ ct.default_int64_field_name, ct.default_binary_vec_field_name ]] for output_fields in fields: res, _ = collection_w.query(default_term_expr, output_fields=output_fields) assert list(res[0].keys()) == fields[-1] @pytest.mark.tags(CaseLabel.L1) def test_query_output_primary_field(self): """ target: test query with output field only primary field method: specify int64 primary field as output field expected: return int64 field """ collection_w, vectors = self.init_collection_general( prefix, insert_data=True)[0:2] res, _ = collection_w.query( default_term_expr, output_fields=[ct.default_int64_field_name]) assert list(res[0].keys()) == [ct.default_int64_field_name] @pytest.mark.tags(CaseLabel.L1) def test_query_output_not_existed_field(self): """ target: test query output not existed field method: query with not existed output field expected: raise exception """ collection_w, vectors = self.init_collection_general( prefix, insert_data=True)[0:2] error = {ct.err_code: 1, ct.err_msg: 'Field int not exist'} output_fields = [["int"], [ct.default_int64_field_name, "int"]] for fields in output_fields: collection_w.query(default_term_expr, output_fields=fields, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.xfail(reason="exception not MilvusException") def test_query_invalid_output_fields(self): """ target: test query with invalid output fields method: query with invalid field fields expected: raise exception """ collection_w, vectors = self.init_collection_general( prefix, insert_data=True)[0:2] output_fields = ["12-s", 1, [1, "2", 3], (1, ), {1: 1}] error = { ct.err_code: 0, ct.err_msg: f'Invalid query format. \'output_fields\' must be a list' } for fields in output_fields: collection_w.query(default_term_expr, output_fields=fields, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L0) def test_query_output_fields_simple_wildcard(self): """ target: test query output_fields with simple wildcard (* and %) method: specify output_fields as "*" and "*", "%" expected: output all scale field; output all fields """ # init collection with fields: int64, float, float_vec, float_vector1 collection_w, df = self.init_multi_fields_collection_wrap( cf.gen_unique_str(prefix)) collection_w.load() # query with wildcard scale(*) output_fields = [ ct.default_int64_field_name, ct.default_float_field_name ] res = df.loc[:1, output_fields].to_dict('records') collection_w.query(default_term_expr, output_fields=["*"], check_task=CheckTasks.check_query_results, check_items={exp_res: res}) # query with wildcard % output_fields2 = [ ct.default_int64_field_name, ct.default_float_vec_field_name, ct.another_float_vec_field_name ] res2 = df.loc[:1, output_fields2].to_dict('records') collection_w.query(default_term_expr, output_fields=["%"], check_task=CheckTasks.check_query_results, check_items={ exp_res: res2, "with_vec": True }) # query with wildcard all fields: vector(%) and scale(*) res3 = df.iloc[:2].to_dict('records') collection_w.query(default_term_expr, output_fields=["*", "%"], check_task=CheckTasks.check_query_results, check_items={ exp_res: res3, "with_vec": True }) @pytest.mark.tags(CaseLabel.L1) def test_query_output_fields_part_scale_wildcard(self): """ target: test query output_fields with part wildcard method: specify output_fields as wildcard and part field expected: verify query result """ # init collection with fields: int64, float, float_vec, float_vector1 collection_w, df = self.init_multi_fields_collection_wrap( cf.gen_unique_str(prefix)) # query with output_fields=["*", float_vector) res = df.iloc[:2, :3].to_dict('records') collection_w.load() collection_w.query( default_term_expr, output_fields=["*", ct.default_float_vec_field_name], check_task=CheckTasks.check_query_results, check_items={ exp_res: res, "with_vec": True }) # query with output_fields=["*", float) res2 = df.iloc[:2, :2].to_dict('records') collection_w.load() collection_w.query(default_term_expr, output_fields=["*", ct.default_float_field_name], check_task=CheckTasks.check_query_results, check_items={exp_res: res2}) @pytest.mark.tags(CaseLabel.L1) def test_query_output_fields_part_vector_wildcard(self): """ target: test query output_fields with part wildcard method: specify output_fields as wildcard and part field expected: verify query result """ # init collection with fields: int64, float, float_vec, float_vector1 collection_w, df = self.init_multi_fields_collection_wrap( cf.gen_unique_str(prefix)) collection_w.load() # query with output_fields=["%", float), expected: all fields res = df.iloc[:2].to_dict('records') collection_w.query(default_term_expr, output_fields=["%", ct.default_float_field_name], check_task=CheckTasks.check_query_results, check_items={ exp_res: res, "with_vec": True }) # query with output_fields=["%", float_vector), expected: int64, float_vector, float_vector1 output_fields = [ ct.default_int64_field_name, ct.default_float_vec_field_name, ct.another_float_vec_field_name ] res2 = df.loc[:1, output_fields].to_dict('records') collection_w.query( default_term_expr, output_fields=["%", ct.default_float_vec_field_name], check_task=CheckTasks.check_query_results, check_items={ exp_res: res2, "with_vec": True }) @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("output_fields", [["*%"], ["**"], ["*", "@"]]) def test_query_invalid_wildcard(self, output_fields): """ target: test query with invalid output wildcard method: output_fields is invalid output wildcard expected: raise exception """ # init collection with fields: int64, float, float_vec, float_vector1 collection_w, df = self.init_multi_fields_collection_wrap( cf.gen_unique_str(prefix)) collection_w.load() # query with invalid output_fields error = { ct.err_code: 1, ct.err_msg: f"Field {output_fields[-1]} not exist" } collection_w.query(default_term_expr, output_fields=output_fields, check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L0) def test_query_partition(self): """ target: test query on partition method: create a partition and query expected: verify query result """ collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix)) partition_w = self.init_partition_wrap(collection_wrap=collection_w) df = cf.gen_default_dataframe_data(ct.default_nb) partition_w.insert(df) assert collection_w.num_entities == ct.default_nb partition_w.load() res = df.iloc[:2, :1].to_dict('records') collection_w.query(default_term_expr, partition_names=[partition_w.name], check_task=CheckTasks.check_query_results, check_items={exp_res: res}) @pytest.mark.tags(CaseLabel.L1) def test_query_partition_without_loading(self): """ target: test query on partition without loading method: query on partition and no loading expected: raise exception """ collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix)) partition_w = self.init_partition_wrap(collection_wrap=collection_w) df = cf.gen_default_dataframe_data(ct.default_nb) partition_w.insert(df) assert partition_w.num_entities == ct.default_nb error = { ct.err_code: 1, ct.err_msg: f'collection {collection_w.name} was not loaded into memory' } collection_w.query(default_term_expr, partition_names=[partition_w.name], check_task=CheckTasks.err_res, check_items=error) @pytest.mark.tags(CaseLabel.L1) def test_query_default_partition(self): """ target: test query on default partition method: query on default partition expected: verify query result """ collection_w, vectors = self.init_collection_general( prefix, insert_data=True)[0:2] res = vectors[0].iloc[:2, :1].to_dict('records') collection_w.query(default_term_expr, partition_names=[ct.default_partition_name], check_task=CheckTasks.check_query_results, check_items={exp_res: res}) @pytest.mark.tags(CaseLabel.L1) def test_query_empty_partition(self): """ target: test query on empty partition method: query on a empty collection expected: empty query result """ collection_w = self.init_collection_wrap( name=cf.gen_unique_str(prefix)) partition_w = self.init_partition_wrap(collection_wrap=collection_w) assert partition_w.is_empty partition_w.load() res, _ = collection_w.query(default_term_expr, partition_names=[partition_w.name]) assert len(res) == 0 @pytest.mark.tags(CaseLabel.L1) def test_query_not_existed_partition(self): """ target: test query on a not existed partition method: query on not existed partition expected: raise exception """ collection_w = self.init_collection_wrap(cf.gen_unique_str(prefix)) collection_w.load() partition_names = cf.gen_unique_str() error = { ct.err_code: 1, ct.err_msg: f'PartitonName: {partition_names} not found' } collection_w.query(default_term_expr, partition_names=[partition_names], check_task=CheckTasks.err_res, check_items=error)