コード例 #1
0
ファイル: utils.py プロジェクト: haojunyu/milvus
def create_collections_and_insert_data():
    import random
    import time
    dim = 128
    default_fields = [
        FieldSchema(name="count", dtype=DataType.INT64, is_primary=True),
        FieldSchema(name="random_value", dtype=DataType.DOUBLE),
        FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=dim)
    ]
    default_schema = CollectionSchema(fields=default_fields,
                                      description="test collection")
    print(f"\nList collections...")
    print(list_collections())
    for col_name in all_index_types:
        print(f"\nCreate collection...")
        collection = Collection(name=col_name, schema=default_schema)
        #  insert data
        nb = 3000
        vectors = [[i / nb for _ in range(dim)] for i in range(nb)]
        collection.insert(
            [[i for i in range(nb)],
             [float(random.randrange(-20, -10)) for _ in range(nb)], vectors])
        print(f"collection name: {col_name}")
        print("Get collection entities")
        start_time = time.time()
        print(f"collection entities: {collection.num_entities}")
        end_time = time.time()
        print("Get collection entities time = %.4fs" % (end_time - start_time))
    print(f"\nList collections...")
    print(list_collections())
コード例 #2
0
ファイル: test_auto_load_balance.py プロジェクト: avmi/milvus
    def test_auto_load_balance(self):
        """

        """
        log.info(f"start to install milvus")
        release_name, host, port = install_milvus(
            "test-auto-load-balance")  # todo add release name
        self.release_name = release_name
        assert host is not None
        conn = connections.connect("default", host=host, port=port)
        assert conn is not None
        self.health_checkers = {
            Op.create: CreateChecker(),
            Op.insert: InsertFlushChecker(),
            Op.flush: InsertFlushChecker(flush=True),
            Op.index: IndexChecker(),
            Op.search: SearchChecker(),
            Op.query: QueryChecker()
        }
        cc.start_monitor_threads(self.health_checkers)
        # wait
        sleep(constants.WAIT_PER_OP * 10)
        all_collections = list_collections()
        for c in all_collections:
            seg_info = utility.get_query_segment_info(c)
            seg_distribution = cf.get_segment_distribution(seg_info)
            for k in seg_distribution.keys():
                log.info(
                    f"collection {c}'s segment distribution in node {k} is {seg_distribution[k]['sealed']}"
                )
        # first assert
        log.info("first assert")
        assert_statistic(self.health_checkers)

        # scale up
        log.info("scale up milvus")
        scale_up_milvus(self.release_name)
        # reset counting
        cc.reset_counting(self.health_checkers)
        sleep(constants.WAIT_PER_OP * 10)
        all_collections = list_collections()
        for c in all_collections:
            seg_info = utility.get_query_segment_info(c)
            seg_distribution = cf.get_segment_distribution(seg_info)
            for k in seg_distribution.keys():
                log.info(
                    f"collection {c}'s sealed segment distribution in node {k} is {seg_distribution[k]['sealed']}"
                )
        # second assert
        log.info("second assert")
        assert_statistic(self.health_checkers)

        # TODO assert segment distribution

        # assert all expectations
        assert_expectations()
コード例 #3
0
def filter_collections_by_prefix(prefix):
    col_list = list_collections()
    res = []
    for col in col_list:
        if col.startswith(prefix):
            res.append(col)
    return res
コード例 #4
0
ファイル: partition.py プロジェクト: filip-halt/pymilvus
def test_partition():
    connections.connect(alias="default")
    print("create collection")
    collection = Collection(name=gen_unique_str(), schema=gen_default_fields())
    print("create partition")
    partition = Partition(collection, name=gen_unique_str())
    print(list_collections())
    assert has_partition(collection.name, partition.name) is True

    data = gen_data(default_nb)
    print("insert data to partition")
    partition.insert(data)
    assert partition.is_empty is False
    assert partition.num_entities == default_nb

    print("load partition")
    partition.load()
    topK = 5
    search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
    print("search partition")
    res = partition.search(data[2][-2:], "float_vector", search_params, topK, "count > 100")
    for hits in res:
        for hit in hits:
            print(hit)

    print("release partition")
    partition.release()
    print("drop partition")
    partition.drop()
    print("drop collection")
    collection.drop()
コード例 #5
0
ファイル: utils.py プロジェクト: haojunyu/milvus
def get_collections():
    print(f"\nList collections...")
    col_list = list_collections()
    print(f"collections_nums: {len(col_list)}")
    # list entities if collections
    for name in col_list:
        c = Collection(name=name)
        print(f"{name}: {c.num_entities}")
コード例 #6
0
ファイル: utils.py プロジェクト: haojunyu/milvus
def create_index():
    # create index
    default_index = {
        "index_type": "IVF_FLAT",
        "params": {
            "nlist": 128
        },
        "metric_type": "L2"
    }
    col_list = list_collections()
    print(f"\nCreate index...")
    for name in col_list:
        c = Collection(name=name)

        print(name)
        print(c)
        index = copy.deepcopy(default_index)
        index["index_type"] = name
        index["params"] = index_params_map[name]
        if name in ["BIN_FLAT", "BIN_IVF_FLAT"]:
            index["metric_type"] = "HAMMING"
        c.create_index(field_name="float_vector", index_params=index)
コード例 #7
0
ファイル: utils.py プロジェクト: symphony233/milvus
def load_and_search():
    print("search data starts")
    col_list = list_collections()
    for name in col_list:
        c = Collection(name=name)
        print(f"collection name: {name}")
        c.load()
        topK = 5
        vectors = [[0.0 for _ in range(128)] for _ in range(3000)]
        index_type = name
        search_params = gen_search_param(index_type)[0]
        print(search_params)
        # search_params = {"metric_type": "L2", "params": {"nprobe": 10}}

        import time
        start_time = time.time()
        print(f"\nSearch...")
        # define output_fields of search result
        res = c.search(vectors[:1],
                       "float_vector",
                       search_params,
                       topK,
                       "count > 500",
                       output_fields=["count", "random_value"],
                       timeout=20)
        end_time = time.time()
        # show result
        for hits in res:
            for hit in hits:
                # Get value of the random value field for search result
                print(hit, hit.entity.get("random_value"))
            ids = hits.ids
            print(ids)

            print("###########")
        print("search latency = %.4fs" % (end_time - start_time))
        c.release()
    print("search data ends")
コード例 #8
0
ファイル: get_all_collections.py プロジェクト: avmi/milvus
def save_all_checker_collections(host="127.0.0.1", prefix="Checker"):
    # create connection
    connections.connect(host=host, port="19530")
    all_collections = list_collections()
    if prefix is None:
        all_collections = [c_name for c_name in all_collections]
    else:
        all_collections = [
            c_name for c_name in all_collections if prefix in c_name
        ]
    m = defaultdict(list)
    for c_name in all_collections:
        prefix = c_name.split("_")[0]
        if len(m[prefix]) <= 10:
            m[prefix].append(c_name)
    selected_collections = []
    for v in m.values():
        selected_collections.extend(v)
    data = {"all": selected_collections}
    print("selected_collections is")
    print(selected_collections)
    with open("/tmp/ci_logs/all_collections.json", "w") as f:
        f.write(json.dumps(data))
コード例 #9
0
ファイル: hello_milvus.py プロジェクト: avmi/milvus
def hello_milvus(host="127.0.0.1"):
    import time
    # create connection
    connections.connect(host=host, port="19530")

    print(f"\nList collections...")
    print(list_collections())

    # create collection
    dim = 128
    default_fields = [
        FieldSchema(name="int64", dtype=DataType.INT64, is_primary=True),
        FieldSchema(name="float", dtype=DataType.FLOAT),
        FieldSchema(name="varchar", dtype=DataType.VARCHAR, max_length=65535),
        FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=dim)
    ]
    default_schema = CollectionSchema(fields=default_fields,
                                      description="test collection")

    print(f"\nCreate collection...")
    collection = Collection(name="hello_milvus", schema=default_schema)

    print(f"\nList collections...")
    print(list_collections())

    #  insert data
    nb = 3000
    vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
    t0 = time.time()
    collection.insert([[i for i in range(nb)],
                       [np.float32(i) for i in range(nb)],
                       [str(i) for i in range(nb)], vectors])
    t1 = time.time()
    print(f"\nInsert {nb} vectors cost {t1 - t0:.4f} seconds")

    t0 = time.time()
    print(f"\nGet collection entities...")
    print(collection.num_entities)
    t1 = time.time()
    print(f"\nGet collection entities cost {t1 - t0:.4f} seconds")

    # create index and load table
    default_index = {
        "index_type": "IVF_SQ8",
        "metric_type": "L2",
        "params": {
            "nlist": 64
        }
    }
    print(f"\nCreate index...")
    t0 = time.time()
    collection.create_index(field_name="float_vector",
                            index_params=default_index)
    t1 = time.time()
    print(f"\nCreate index cost {t1 - t0:.4f} seconds")
    print("\nGet replicas number")
    try:
        replicas_info = collection.get_replicas()
        replica_number = len(replicas_info.groups)
        print(f"\nReplicas number is {replica_number}")
    except Exception as e:
        print(str(e))
        replica_number = 1
    print(f"\nload collection...")
    t0 = time.time()
    collection.load(replica_number=replica_number)
    t1 = time.time()
    print(f"\nload collection cost {t1 - t0:.4f} seconds")

    # load and search
    topK = 5
    search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
    t0 = time.time()
    print(f"\nSearch...")
    # define output_fields of search result
    res = collection.search(vectors[-2:],
                            "float_vector",
                            search_params,
                            topK,
                            "int64 > 100",
                            output_fields=["int64", "float"],
                            timeout=TIMEOUT)
    t1 = time.time()
    print(f"search cost  {t1 - t0:.4f} seconds")
    # show result
    for hits in res:
        for hit in hits:
            # Get value of the random value field for search result
            print(hit, hit.entity.get("float"))

    # query
    expr = "int64 in [2,4,6,8]"
    output_fields = ["int64", "float"]
    res = collection.query(expr, output_fields, timeout=TIMEOUT)
    sorted_res = sorted(res, key=lambda k: k['int64'])
    for r in sorted_res:
        print(r)
コード例 #10
0
    sorted_res = sorted(res, key=lambda k: k['int64'])
    for r in sorted_res:
        print(r)


parser = argparse.ArgumentParser(description='host ip')
parser.add_argument('--host', type=str, default='127.0.0.1', help='host ip')
args = parser.parse_args()
# add time stamp
print(
    f"\nStart time: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))}"
)
# create connection
connections.connect(host=args.host, port="19530")
print("\nList collections...")
all_collections = list_collections()
print(all_collections)
all_collections = [c_name for c_name in all_collections if "Checker" in c_name]
m = defaultdict(list)
for c_name in all_collections:
    prefix = c_name.split("_")[0]
    if len(m[prefix]) <= 5:
        m[prefix].append(c_name)
selected_collections = []
for v in m.values():
    selected_collections.extend(v)
print("selected_collections is")
print(selected_collections)
cnt = 0
for collection_name in selected_collections:
    print(f"check collection {collection_name}")
コード例 #11
0
def hello_milvus(host="127.0.0.1"):
    import time
    # create connection
    connections.connect(host=host, port="19530")

    print(f"\nList collections...")
    print(list_collections())

    # create collection
    dim = 128
    default_fields = [
        FieldSchema(name="count", dtype=DataType.INT64, is_primary=True),
        FieldSchema(name="random_value", dtype=DataType.DOUBLE),
        FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=dim)
    ]
    default_schema = CollectionSchema(fields=default_fields,
                                      description="test collection")

    print(f"\nCreate collection...")
    collection = Collection(name="hello_milvus", schema=default_schema)

    print(f"\nList collections...")
    print(list_collections())

    #  insert data
    nb = 3000
    vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
    t0 = time.time()
    collection.insert([[i for i in range(nb)],
                       [float(random.randrange(-20, -10)) for _ in range(nb)],
                       vectors])
    t1 = time.time()
    print(f"\nInsert {nb} vectors cost {t1 - t0} seconds")

    t0 = time.time()
    print(f"\nGet collection entities...")
    print(collection.num_entities)
    t1 = time.time()
    print(f"\nGet collection entities cost {t1 - t0} seconds")

    # create index and load table
    default_index = {
        "index_type": "IVF_FLAT",
        "params": {
            "nlist": 128
        },
        "metric_type": "L2"
    }
    print(f"\nCreate index...")
    t0 = time.time()
    collection.create_index(field_name="float_vector",
                            index_params=default_index)
    t1 = time.time()
    print(f"\nCreate index cost {t1 - t0} seconds")
    print(f"\nload collection...")
    t0 = time.time()
    collection.load()
    t1 = time.time()
    print(f"\nload collection cost {t1 - t0} seconds")

    # load and search
    topK = 5
    search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
    start_time = time.time()
    print(f"\nSearch...")
    # define output_fields of search result
    res = collection.search(vectors[-2:],
                            "float_vector",
                            search_params,
                            topK,
                            "count > 100",
                            output_fields=["count", "random_value"])
    end_time = time.time()

    # show result
    for hits in res:
        for hit in hits:
            # Get value of the random value field for search result
            print(hit, hit.entity.get("random_value"))
    print("search latency = %.4fs" % (end_time - start_time))

    # query
    expr = "count in [2,4,6,8]"
    output_fields = ["count", "random_value"]
    res = collection.query(expr, output_fields)
    sorted_res = sorted(res, key=lambda k: k['count'])
    for r in sorted_res:
        print(r)
コード例 #12
0
    expr = "int64 in [2,4,6,8]"
    output_fields = ["int64", "float"]
    res = collection.query(expr, output_fields, timeout=TIMEOUT)
    sorted_res = sorted(res, key=lambda k: k['int64'])
    for r in sorted_res:
        print(r)
    collection.release()


parser = argparse.ArgumentParser(description='host ip')
parser.add_argument('--host', type=str, default='10.96.77.209', help='host ip')
args = parser.parse_args()
# add time stamp
print(
    f"\nStart time: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))}"
)
# create connection
connections.connect(host=args.host, port="19530")
print(f"\nList collections...")
collection_list = list_collections()
print(collection_list)
# keep 10 collections with prefix "CreateChecker_", others will be skiped
cnt = 0
for collection_name in collection_list:
    if collection_name.startswith("CreateChecker_"):
        cnt += 1
    if collection_name.startswith("CreateChecker_") and cnt > 10:
        continue
    print(f"check collection {collection_name}")
    hello_milvus(collection_name)
コード例 #13
0
ファイル: example.py プロジェクト: filip-halt/pymilvus
import random

from pymilvus import (connections, list_collections, FieldSchema,
                      CollectionSchema, DataType, Collection, Partition,
                      utility)

# configure milvus hostname and port
print(f"\nCreate connection...")
connections.connect()

# List all collection names
print(f"\nList collections...")
print(list_collections())

# Create a collection named 'demo_film_tutorial'
print(f"\nCreate collection...")
field1 = FieldSchema(name="release_year",
                     dtype=DataType.INT64,
                     description="int64",
                     is_primary=True)
field2 = FieldSchema(name="embedding",
                     dtype=DataType.FLOAT_VECTOR,
                     description="float vector",
                     dim=8,
                     is_primary=False)
schema = CollectionSchema(fields=[field1, field2],
                          description="collection description")
collection = Collection(name='demo_film_tutorial', data=None, schema=schema)

# List all collection names
print(f"\nList collections...")
コード例 #14
0
ファイル: hello_milvus.py プロジェクト: filip-halt/pymilvus
def hello_milvus():
    # create connection
    connections.connect()

    print(f"\nList collections...")
    print(list_collections())

    # create collection
    dim = 128
    default_fields = [
        FieldSchema(name="count", dtype=DataType.INT64, is_primary=True),
        FieldSchema(name="random_value", dtype=DataType.DOUBLE),
        FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=dim)
    ]
    default_schema = CollectionSchema(fields=default_fields,
                                      description="test collection")

    print(f"\nCreate collection...")
    collection = Collection(name="hello_milvus", schema=default_schema)

    print(f"\nList collections...")
    print(list_collections())

    #  insert data
    nb = 3000
    vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
    collection.insert([[i for i in range(nb)],
                       [float(random.randrange(-20, -10)) for _ in range(nb)],
                       vectors])

    print(f"\nGet collection entities...")
    print(collection.num_entities)

    # create index and load table
    default_index = {
        "index_type": "IVF_FLAT",
        "params": {
            "nlist": 128
        },
        "metric_type": "L2"
    }
    print(f"\nCreate index...")
    collection.create_index(field_name="float_vector",
                            index_params=default_index)
    print(f"\nload collection...")
    collection.load()

    # load and search
    topK = 5
    search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
    import time
    start_time = time.time()
    print(f"\nSearch...")
    # define output_fields of search result
    res = collection.search(vectors[-2:],
                            "float_vector",
                            search_params,
                            topK,
                            "count > 100",
                            output_fields=["count", "random_value"])
    end_time = time.time()

    # show result
    for hits in res:
        for hit in hits:
            # Get value of the random value field for search result
            print(hit, hit.entity.get("random_value"))
    print("search latency = %.4fs" % (end_time - start_time))

    # drop collection
    collection.drop()