Esempio n. 1
0
 def test_insert_binary_dim_not_match(self):
     """
     target: test insert binary with dim not match
     method: insert binary data dim not equal to schema
     expected: raise exception
     """
     c_name = cf.gen_unique_str(prefix)
     collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema)
     dim = 120
     df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb, dim=dim)
     error = {ct.err_code: 1, ct.err_msg: f'Collection field dim is {ct.default_dim}, but entities field dim is {dim}'}
     collection_w.insert(data=df, check_task=CheckTasks.err_res, check_items=error)
Esempio n. 2
0
 def test_insert_binary_dataframe(self):
     """
     target: test insert binary dataframe
     method: 1. create by schema 2. insert dataframe
     expected: assert num_entities
     """
     c_name = cf.gen_unique_str(prefix)
     collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema)
     df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb)
     mutation_res, _ = collection_w.insert(data=df)
     assert mutation_res.insert_count == ct.default_nb
     assert mutation_res.primary_keys == df[ct.default_int64_field_name].values.tolist()
     assert collection_w.num_entities == ct.default_nb
Esempio n. 3
0
 def test_collection_binary_created_by_dataframe(self):
     """
     target: test collection with dataframe
     method: create collection with dataframe
     expected: create successfully
     """
     conn = self._connect()
     c_name = cf.gen_unique_str(prefix)
     df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb)
     schema = cf.gen_default_binary_collection_schema()
     self.collection_wrap.init_collection(name=c_name, data=df, check_task=CheckTasks.check_collection_property,
                                          check_items={exp_name: c_name, exp_schema: schema})
     conn.flush([c_name])
Esempio n. 4
0
 def test_insert_binary_dataframe(self):
     """
     target: test insert binary dataframe
     method: 1. create by schema 2. insert dataframe
     expected: assert num_entities
     """
     c_name = cf.gen_unique_str(prefix)
     collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema)
     df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb)
     collection_w.insert(data=df)
     conn, _ = self.connection_wrap.get_connection()
     conn.flush([c_name])
     assert collection_w.num_entities == ct.default_nb
Esempio n. 5
0
    def test_compact_after_binary_index(self):
        """
        target: test compact after create index
        method: 1.insert binary data into two segments
                2.create binary index
                3.compact
                4.search
        expected: Verify segment info and index info
        """
        # create collection with 1 shard and insert 2 segments
        collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), shards_num=1,
                                                 schema=cf.gen_default_binary_collection_schema())
        for i in range(2):
            df, _ = cf.gen_default_binary_dataframe_data()
            collection_w.insert(data=df)
            assert collection_w.num_entities == (i + 1) * ct.default_nb

        # create index
        collection_w.create_index(ct.default_binary_vec_field_name, ct.default_binary_index)
        log.debug(collection_w.index())

        # load and search
        collection_w.load()
        search_params = {"metric_type": "JACCARD", "params": {"nprobe": 32}}
        search_res_one, _ = collection_w.search(df[ct.default_binary_vec_field_name][:ct.default_nq].to_list(),
                                                ct.default_binary_vec_field_name, search_params, ct.default_limit)

        # compact
        collection_w.compact()
        collection_w.wait_for_compaction_completed()
        c_plans = collection_w.get_compaction_plans(check_task=CheckTasks.check_merge_compact)[0]

        # waiting for handoff completed and search
        cost = 30
        start = time()
        while True:
            sleep(5)
            segment_info = self.utility_wrap.get_query_segment_info(collection_w.name)[0]
            if len(segment_info) != 0 and segment_info[0].segmentID == c_plans.plans[0].target:
                log.debug(segment_info)
                break
            if time() - start > cost:
                raise MilvusException(1, f"Handoff after compact and index cost more than {cost}s")

        # verify search result
        search_res_two, _ = collection_w.search(df[ct.default_binary_vec_field_name][:ct.default_nq].to_list(),
                                                ct.default_binary_vec_field_name, search_params, ct.default_limit)
        assert len(search_res_one) == ct.default_nq
        for hits in search_res_one:
            assert len(hits) == ct.default_limit
Esempio n. 6
0
 def test_collection_binary_with_dataframe(self):
     """
     target: test binary collection with dataframe
     method: create binary collection with dataframe
     expected: collection num entities equal to nb
     """
     conn = self._connect()
     c_name = cf.gen_unique_str(prefix)
     df, _ = cf.gen_default_binary_dataframe_data(nb=ct.default_nb)
     self.collection_wrap.init_collection(c_name, schema=default_binary_schema, data=df,
                                          check_task=CheckTasks.check_collection_property,
                                          check_items={exp_name: c_name, exp_schema: default_binary_schema})
     conn.flush([c_name])
     assert self.collection_wrap.num_entities == ct.default_nb
Esempio n. 7
0
 def test_insert_binary_after_index(self):
     """
     target: test insert binary after index
     method: 1.create index 2.insert binary data
     expected: 1.index ok 2.num entities correct
     """
     schema = cf.gen_default_binary_collection_schema()
     collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix), schema=schema)
     collection_w.create_index(ct.default_binary_vec_field_name, default_binary_index_params)
     assert collection_w.has_index()[0]
     index, _ = collection_w.index()
     assert index == Index(collection_w.collection, ct.default_binary_vec_field_name, default_binary_index_params)
     assert collection_w.indexes[0] == index
     df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb)
     collection_w.insert(data=df)
     assert collection_w.num_entities == ct.default_nb
Esempio n. 8
0
    def test_compact_after_binary_index(self):
        """
        target: test compact after create index
        method: 1.insert binary data into two segments
                2.create binary index
                3.compact
                4.search
        expected: Verify segment info and index info
        """
        collection_w = self.init_collection_wrap(
            name=cf.gen_unique_str(prefix),
            shards_num=1,
            schema=cf.gen_default_binary_collection_schema())
        for i in range(2):
            df, _ = cf.gen_default_binary_dataframe_data()
            collection_w.insert(data=df)
            assert collection_w.num_entities == (i + 1) * ct.default_nb

        # create index
        collection_w.create_index(ct.default_binary_vec_field_name,
                                  ct.default_binary_index)
        log.debug(collection_w.index())

        collection_w.load()

        search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}}
        vectors = cf.gen_binary_vectors(ct.default_nq, ct.default_dim)[1]
        search_res_one, _ = collection_w.search(
            vectors, ct.default_binary_vec_field_name, search_params,
            ct.default_limit)
        assert len(search_res_one) == ct.default_nq
        for hits in search_res_one:
            assert len(hits) == ct.default_limit

        # compact
        collection_w.compact()
        collection_w.wait_for_compaction_completed()
        collection_w.get_compaction_plans(
            check_task=CheckTasks.check_merge_compact)

        # verify index re-build and re-load
        search_params = {"metric_type": "L1", "params": {"nprobe": 10}}
        search_res_two, _ = collection_w.search(
            vectors,
            ct.default_binary_vec_field_name,
            search_params,
            ct.default_limit,
            check_task=CheckTasks.err_res,
            check_items={
                ct.err_code: 1,
                ct.err_msg: "metric type not found: (L1)"
            })

        # verify search result
        search_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}}
        search_res_two, _ = collection_w.search(
            vectors, ct.default_binary_vec_field_name, search_params,
            ct.default_limit)
        for i in range(ct.default_nq):
            for j in range(ct.default_limit):
                assert search_res_two[i][j].id == search_res_one[i][j].id