Example #1
0
    def SearchByID(self, request, context):
        metadata = {'resp_class': milvus_pb2.TopKQueryResult}

        collection_name = request.collection_name

        topk = request.topk

        if len(request.extra_params) == 0:
            raise exceptions.SearchParamError(message="Search param loss",
                                              metadata=metadata)
        params = ujson.loads(str(request.extra_params[0].value))

        logger.info('Search {}: topk={} params={}'.format(
            collection_name, topk, params))

        if topk > self.MAX_TOPK or topk <= 0:
            raise exceptions.InvalidTopKError(
                message='Invalid topk: {}'.format(topk), metadata=metadata)

        collection_meta = self.collection_meta.get(collection_name, None)

        if not collection_meta:
            status, info = self.router.connection(
                metadata=metadata).describe_collection(collection_name)
            if not status.OK():
                raise exceptions.CollectionNotFoundError(collection_name,
                                                         metadata=metadata)

            self.collection_meta[collection_name] = info
            collection_meta = info

        start = time.time()

        query_record_array = []
        if int(collection_meta.metric_type) >= MetricType.HAMMING.value:
            for query_record in request.query_record_array:
                query_record_array.append(bytes(query_record.binary_data))
        else:
            for query_record in request.query_record_array:
                query_record_array.append(list(query_record.float_data))

        partition_tags = getattr(request, "partition_tag_array", [])
        ids = getattr(request, "id_array", [])
        search_result = self.router.connection(
            metadata=metadata).search_by_ids(collection_name, ids, topk,
                                             partition_tags, params)
        # status, id_results, dis_results = self._do_query(context,
        #                                                  collection_name,
        #                                                  collection_meta,
        #                                                  query_record_array,
        #                                                  topk,
        #                                                  params,
        #                                                  partition_tags=getattr(request, "partition_tag_array", []),
        #                                                  metadata=metadata)

        now = time.time()
        logger.info('SearchVector takes: {}'.format(now - start))
        return search_result
Example #2
0
    def Search(self, request, context):

        metadata = {'resp_class': milvus_pb2.TopKQueryResult}

        collection_name = request.collection_name

        topk = request.topk

        if len(request.extra_params) == 0:
            raise exceptions.SearchParamError(message="Search parma loss", metadata=metadata)
        params = ujson.loads(str(request.extra_params[0].value))

        logger.info('Search {}: topk={} params={}'.format(
            collection_name, topk, params))

        # if nprobe > self.MAX_NPROBE or nprobe <= 0:
        #     raise exceptions.InvalidArgumentError(
        #         message='Invalid nprobe: {}'.format(nprobe), metadata=metadata)

        if topk > self.MAX_TOPK or topk <= 0:
            raise exceptions.InvalidTopKError(
                message='Invalid topk: {}'.format(topk), metadata=metadata)

        collection_meta = self.collection_meta.get(collection_name, None)

        if not collection_meta:
            status, info = self.router.connection(
                metadata=metadata).describe_collection(collection_name)
            if not status.OK():
                raise exceptions.CollectionNotFoundError(collection_name,
                                                    metadata=metadata)

            self.collection_meta[collection_name] = info
            collection_meta = info

        start = time.time()

        query_record_array = []
        if int(collection_meta.metric_type) >= MetricType.HAMMING.value:
            for query_record in request.query_record_array:
                query_record_array.append(bytes(query_record.binary_data))
        else:
            for query_record in request.query_record_array:
                query_record_array.append(list(query_record.float_data))

        status, id_results, dis_results = self._do_query(context,
                                                         collection_name,
                                                         collection_meta,
                                                         query_record_array,
                                                         topk,
                                                         params,
                                                         partition_tags=getattr(request, "partition_tag_array", []),
                                                         metadata=metadata)

        now = time.time()
        logger.info('SearchVector takes: {}'.format(now - start))

        topk_result_list = milvus_pb2.TopKQueryResult(
            status=status_pb2.Status(error_code=status.error_code,
                                     reason=status.reason),
            row_num=len(request.query_record_array) if len(id_results) else 0,
            ids=id_results,
            distances=dis_results)
        return topk_result_list
    def _route(self, collection_name, range_array, partition_tags=None, metadata=None, **kwargs):
        # PXU TODO: Implement Thread-local Context
        # PXU TODO: Session life mgt

        if not partition_tags:
            cond = and_(
                or_(Tables.table_id == collection_name, Tables.owner_table == collection_name),
                Tables.state != Tables.TO_DELETE)
        else:
            # TODO: collection default partition is '_default'
            cond = and_(Tables.state != Tables.TO_DELETE,
                        Tables.owner_table == collection_name)
                        # Tables.partition_tag.in_(partition_tags))
            if '_default' in partition_tags:
                default_par_cond = and_(Tables.table_id == collection_name, Tables.state != Tables.TO_DELETE)
                cond = or_(cond, default_par_cond)
        try:
            collections = db.Session.query(Tables).filter(cond).all()
        except sqlalchemy_exc.SQLAlchemyError as e:
            raise exceptions.DBError(message=str(e), metadata=metadata)

        if not collections:
            logger.error("Cannot find collection {} / {} in metadata".format(collection_name, partition_tags))
            raise exceptions.CollectionNotFoundError('{}:{}'.format(collection_name, partition_tags), metadata=metadata)

        collection_list = []
        if not partition_tags:
            collection_list = [str(collection.table_id) for collection in collections]
        else:
            for collection in collections:
                if collection.table_id == collection_name:
                    collection_list.append(collection_name)
                    continue

                for tag in partition_tags:
                    if re.match(tag, collection.partition_tag):
                        collection_list.append(collection.table_id)
                        break

        file_type_cond = or_(
            TableFiles.file_type == TableFiles.FILE_TYPE_RAW,
            TableFiles.file_type == TableFiles.FILE_TYPE_TO_INDEX,
            TableFiles.file_type == TableFiles.FILE_TYPE_INDEX,
        )
        file_cond = and_(file_type_cond, TableFiles.table_id.in_(collection_list))
        try:
            files = db.Session.query(TableFiles).filter(file_cond).all()
        except sqlalchemy_exc.SQLAlchemyError as e:
            raise exceptions.DBError(message=str(e), metadata=metadata)

        if not files:
            logger.warning("Collection file is empty. {}".format(collection_list))
        #     logger.error("Cannot find collection file id {} / {} in metadata".format(collection_name, partition_tags))
        #     raise exceptions.CollectionNotFoundError('Collection file id not found. {}:{}'.format(collection_name, partition_tags),
        #                                              metadata=metadata)

        db.remove_session()

        servers = self.readonly_topo.group_names
        logger.info('Available servers: {}'.format(list(servers)))

        ring = HashRing(servers)

        routing = {}

        for f in files:
            target_host = ring.get_node(str(f.id))
            sub = routing.get(target_host, None)
            if not sub:
                sub = []
                routing[target_host] = sub
            routing[target_host].append(str(f.id))

        return routing
Example #4
0
    def _route(self,
               collection_name,
               range_array,
               partition_tags=None,
               metadata=None,
               **kwargs):
        # PXU TODO: Implement Thread-local Context
        # PXU TODO: Session life mgt
        """Router filter segment files which status are not suitable to be search. So the ro nodes
        do not filter files. Cause the writable node may update segment file status. In mishards
        cluster, the metadata is used to keep consistent segment file status.
        """

        # Select all available partitions from metadata.
        if not partition_tags:
            cond = and_(
                or_(Tables.table_id == collection_name,
                    Tables.owner_table == collection_name),
                Tables.state != Tables.TO_DELETE)
        else:
            # TODO: collection default partition is '_default'
            cond = and_(Tables.state != Tables.TO_DELETE,
                        Tables.owner_table == collection_name)
            # Tables.partition_tag.in_(partition_tags))
            if '_default' in partition_tags:
                # There is not a partition entity in meta table, the default partition correspond
                # to collection entity in meta.
                default_par_cond = and_(Tables.table_id == collection_name,
                                        Tables.state != Tables.TO_DELETE)
                cond = or_(cond, default_par_cond)
        try:
            collections = db.Session.query(Tables).filter(cond).all()
        except sqlalchemy_exc.SQLAlchemyError as e:
            raise exceptions.DBError(message=str(e), metadata=metadata)

        if not collections:
            logger.error(
                "Cannot find collection {} / {} in metadata during routing. Meta url: {}"
                .format(collection_name, partition_tags, db.url))
            raise exceptions.CollectionNotFoundError(
                "{}:{} not found in metadata".format(collection_name,
                                                     partition_tags),
                metadata=metadata)

        collection_list = []
        if not partition_tags:
            collection_list = [
                str(collection.table_id) for collection in collections
            ]
        else:
            for collection in collections:
                if collection.table_id == collection_name:
                    collection_list.append(collection_name)
                    continue

                # Here to support regex match.
                for tag in partition_tags:
                    if re.match(tag, collection.partition_tag):
                        collection_list.append(collection.table_id)
                        break

        file_type_cond = or_(
            TableFiles.file_type == TableFiles.FILE_TYPE_RAW,
            TableFiles.file_type == TableFiles.FILE_TYPE_TO_INDEX,
            TableFiles.file_type == TableFiles.FILE_TYPE_INDEX,
        )
        file_cond = and_(file_type_cond,
                         TableFiles.table_id.in_(collection_list))
        try:
            files = db.Session.query(TableFiles).filter(file_cond).all()
        except sqlalchemy_exc.SQLAlchemyError as e:
            raise exceptions.DBError(message=str(e), metadata=metadata)

        if not files:
            logger.warning(
                "Collection file is empty. {}".format(collection_list))
        #     logger.error("Cannot find collection file id {} / {} in metadata".format(collection_name, partition_tags))
        #     raise exceptions.CollectionNotFoundError('Collection file id not found. {}:{}'.format(collection_name, partition_tags),
        #                                              metadata=metadata)

        db.remove_session()

        # Use consistency hash to router segment files. The nodes are readonly nodes,
        # and items are segment files.
        servers = self.readonly_topo.group_names
        logger.info('Available servers: {}'.format(list(servers)))

        ring = HashRing(servers)

        routing = {}

        for f in files:
            target_host = ring.get_node(str(f.id))
            sub = routing.get(target_host, None)
            if not sub:
                sub = []
                routing[target_host] = sub
            # routing[target_host].append({"id": str(f.id), "update_time": int(f.updated_time)})
            routing[target_host].append((str(f.id), int(f.updated_time)))

        # Here to check files need to be updated.
        filter_routing = {}
        for host, filess in routing.items():
            ud_files = filter_file_to_update(host, filess)
            search_files = [f[0] for f in filess]
            filter_routing[host] = (search_files, ud_files)

        return filter_routing