Beispiel #1
0
    def test_search_step(self):
        sqltable_in_ds, sqltable_out_ds = get_two_sqlite_data_stores(
            SQLTabelDataStoreComputeDescriptorsTestCase.db_path,
            SQLTabelDataStoreComputeDescriptorsTestCase.table_name_in,
            SQLTabelDataStoreComputeDescriptorsTestCase.table_name_out,
            "ndarray")

        with sqltable_in_ds:
            items = sqltable_in_ds.get_items_sorted_by_ids()
            count_ = sqltable_in_ds.get_count()
            items = aggregate_iterable(items, count_)
            ids = np.arange(1, 11)
            searcher_ = exhaustive_searcher.ExhaustiveSearcher(items, ids)

            Q = np.arange(8).reshape((1, -1))
            np_ds = numpy_datastore.NumpyDataStore(Q)

            steps.search_step(np_ds,
                              searcher_,
                              10,
                              sqltable_out_ds,
                              force=True)
            truth_nearest_ids = np.arange(1, 11)
            with sqltable_out_ds:
                items = sqltable_out_ds.get_items_sorted_by_ids()
                first_item = next(items)
                self.assertTrue(np.array_equal(truth_nearest_ids, first_item))
    def get_items_sorted_by_ids(self, ids_sorted: np.ndarray = None):
        with ExitStack() as stack:
            if hasattr(self.stream_data_store, '__enter__'):
                stack.enter_context(self.stream_data_store)

            if not self.stream_data_store.is_stream_data_store():
                items_sorted_by_ids = self.stream_data_store.get_items_sorted_by_ids(
                    ids_sorted)
            else:
                ids_sorted_stream = None
                if ids_sorted is not None:
                    count_ = ids_sorted.shape[0]
                    ids_sorted = ids_sorted.ravel()
                    ids_sorted_stream = iter(ids_sorted)
                else:
                    count_ = self.stream_data_store.get_count()

                items_sorted_by_ids_stream = self.stream_data_store.get_items_sorted_by_ids(
                    ids_sorted_stream)
                items_sorted_by_ids_ndarray = aggregate_iterable(
                    items_sorted_by_ids_stream,
                    detect_final_shape_by_first_elem=self.
                    detect_final_shape_by_first_elem,
                    final_shape=self.shape,
                    n_elements=count_)
                items_sorted_by_ids = items_sorted_by_ids_ndarray

            if self.shape is not None:
                items_sorted_by_ids = items_sorted_by_ids.reshape(self.shape)
            if self.slice_get is not None:
                items_sorted_by_ids = items_sorted_by_ids[self.slice_get]

            return items_sorted_by_ids
Beispiel #3
0
    def chunkify(self, chunk_size=-1):
        if not self.is_stream_chunkified:
            chunk_sizes = self.chunk_sizes_(chunk_size)

            it = iter(self.data_stream)

            chunks_stream = (aggregate_iterable(it, detect_final_shape_by_first_elem=True,
                                                n_elements=chunk_size) for chunk_size in chunk_sizes)
            self.data_stream = chunks_stream
            self.is_stream_chunkified = True
    def wrapper(*args):
        new_args = []
        for arg in args:
            new_arg = arg
            if not isinstance(arg, np.ndarray) and isinstance(
                    arg, collections.Iterable):
                aggregated = aggregate_iterable(arg, )
                new_arg = aggregated

            new_args.append(new_arg)
        return func(*new_args)
    def get_ids_sorted(self):
        with ExitStack() as stack:
            if hasattr(self.stream_data_store, '__enter__'):
                stack.enter_context(self.stream_data_store)

            if not self.stream_data_store.is_stream_data_store():
                ids_sorted_ndarray = self.stream_data_store.get_ids_sorted()
            else:
                ids_sorted = self.stream_data_store.get_ids_sorted()
                count_ = self.stream_data_store.get_count()
                ids_sorted_ndarray = aggregate_iterable(ids_sorted,
                                                        n_elements=count_)
            return ids_sorted_ndarray
    def save_items_sorted_by_ids(self, items_sorted_by_ids: np.ndarray, ids_sorted: np.ndarray = None):
        if items_sorted_by_ids is None:
            return
        if ids_sorted is None:
            if isinstance(items_sorted_by_ids, list):
                items_sorted_by_ids = np.asarray(items_sorted_by_ids)
                ids_sorted = np.arange(1, len(items_sorted_by_ids) + 1)
            elif not isinstance(items_sorted_by_ids, np.ndarray):
                items_sorted_by_ids, items_sorted_by_ids_copy_ = itertools.tee(items_sorted_by_ids)
                items_len = sum(1 for _ in items_sorted_by_ids_copy_)
                ids_sorted = np.arange(1, items_len + 1)
            else:
                ids_sorted = np.arange(1, len(items_sorted_by_ids) + 1)
        elif not isinstance(ids_sorted, np.ndarray):
            ids_sorted = aggregate_iterable(ids_sorted)

        if not isinstance(items_sorted_by_ids, np.ndarray):
            items_sorted_by_ids = aggregate_iterable(items_sorted_by_ids, detect_final_shape_by_first_elem=True,
                                                     n_elements=len(ids_sorted))

        self.items_sorted_by_ids = items_sorted_by_ids
        self.ids_sorted = ids_sorted
Beispiel #7
0
    def test_quantize_shape_only(self):
        sqltable_in_ds, sqltable_out_ds = get_two_sqlite_data_stores(
            SQLTabelDataStoreComputeDescriptorsTestCase.db_path,
            SQLTabelDataStoreComputeDescriptorsTestCase.table_name_in,
            SQLTabelDataStoreComputeDescriptorsTestCase.table_name_out,
            "ndarray")

        quantizer = pq_quantizer.PQQuantizer(n_clusters=4, n_quantizers=2)
        steps.quantize_step(sqltable_in_ds,
                            quantizer,
                            sqltable_out_ds,
                            force=True)

        with sqltable_out_ds:
            cluster_centers = sqltable_out_ds.get_items_sorted_by_ids()
            cluster_centers_ndarray = aggregate_iterable(
                cluster_centers, detect_final_shape_by_first_elem=True)
            truth_shape = (2, 4, 4)
            self.assertEquals(cluster_centers_ndarray.shape, truth_shape)
Beispiel #8
0
def aggregate_items(foreignid_itemsiterable):
    itemsiterable = foreignid_itemsiterable[1]
    itemsiterable = map(lambda foreignid_item: foreignid_item[1], itemsiterable)
    aggregated_items = ai.aggregate_iterable(itemsiterable, detect_final_shape_by_first_elem=True)
    return (foreignid_itemsiterable[0], aggregated_items)