Exemple #1
0
    def __init__(self,
                 data,
                 depth,
                 n_trees,
                 projection_sparsity='auto',
                 shape=None,
                 mmap=False):
        """
        Initializes an MRPT index object.
        :param data: Input data either as a NxDim numpy ndarray or as a filepath to a binary file containing the data
        :param depth: The depth of the trees
        :param n_trees: The number of trees used in the index
        :param projection_sparsity: Expected ratio of non-zero components in a projection matrix
        :param shape: Shape of the data as a tuple (N, dim). Needs to be specified only if loading the data from a file.
        :param mmap: If true, the data is mapped into memory. Has effect only if the data is loaded from a file.
        :return:
        """
        if isinstance(data, np.ndarray):
            if len(data) == 0 or len(data.shape) != 2:
                raise ValueError(
                    "The data matrix should be non-empty and two-dimensional")
            if data.dtype != np.float32:
                raise ValueError("The data matrix should have type float32")
            if not data.flags['C_CONTIGUOUS'] or not data.flags['ALIGNED']:
                raise ValueError(
                    "The data matrix has to be C_CONTIGUOUS and ALIGNED")
            n_samples, dim = data.shape
        elif isinstance(data, str):
            if not isinstance(shape, tuple) or len(shape) != 2:
                raise ValueError(
                    "You must specify the shape of the data as a tuple (N, dim) "
                    "when loading data from a binary file")
            n_samples, dim = shape
        else:
            raise ValueError("Data must be either an ndarray or a filepath")

        max_depth = np.ceil(np.log2(n_samples))
        if not 1 <= depth <= max_depth:
            raise ValueError("Depth should be in range [1, %d]" % max_depth)

        if n_trees < 1:
            raise ValueError("Number of trees must be positive")

        if projection_sparsity == 'auto':
            projection_sparsity = 1. / np.sqrt(dim)
        elif projection_sparsity is None:
            projection_sparsity = 1
        elif not 0 < projection_sparsity <= 1:
            raise ValueError("Sparsity should be in (0, 1]")

        if mmap and os.name == 'nt':
            raise ValueError("Memory mapping is not available on Windows")

        self.index = mrptlib.MrptIndex(data, n_samples, dim, depth, n_trees,
                                       projection_sparsity, mmap)
        self.built = False
Exemple #2
0
    def __init__(self, data, shape=None, mmap=False):
        """
        Initializes an MRPT index object.
        :param data: Input data either as a NxDim numpy ndarray or as a filepath to a binary file containing the data.
        :param shape: Shape of the data as a tuple (N, dim). Needs to be specified only if loading the data from a file.
        :param mmap: If true, the data is mapped into memory. Has effect only if the data is loaded from a file.
        :return:
        """
        if isinstance(data, np.ndarray):
            if len(data) == 0 or len(data.shape) != 2:
                raise ValueError(
                    "The data matrix should be non-empty and two-dimensional")
            if data.dtype != np.float32:
                raise ValueError("The data matrix should have type float32")
            if not data.flags['C_CONTIGUOUS'] or not data.flags['ALIGNED']:
                raise ValueError(
                    "The data matrix has to be C_CONTIGUOUS and ALIGNED")
            n_samples, dim = data.shape
        elif isinstance(data, str):
            if not isinstance(shape, tuple) or len(shape) != 2:
                raise ValueError(
                    "You must specify the shape of the data as a tuple (N, dim) "
                    "when loading data from a binary file")
            n_samples, dim = shape
        elif data is not None:
            raise ValueError("Data must be either an ndarray or a filepath")

        if mmap and os_name == 'nt':
            raise ValueError("Memory mapping is not available on Windows")

        if data is not None:
            self.index = mrptlib.MrptIndex(data, n_samples, dim, mmap)
            self.dim = dim

        self.built = False
        self.autotuned = False