コード例 #1
0
    def store_shard(self,
                    ds_in: Iterable,
                    ds_out: Dataset,
                    offset: int,
                    token=None):
        """
        Takes a shard of iteratable ds_in, compute and stores in DatasetView
        """
        def _func_argd(item):
            if isinstance(item, DatasetView) or isinstance(item, Dataset):
                item = item.numpy()
            result = self.call_func(
                0, item
            )  # If the iterable obtained from iterating ds_in is a list, it is not treated as list
            return result

        ds_in = list(ds_in)
        results = self.map(
            _func_argd,
            ds_in,
        )
        results = self._unwrap(results)
        results = self.map(lambda x: self._flatten_dict(x, schema=self.schema),
                           results)
        results = list(results)

        results = self._split_list_to_dicts(results)

        results_values = list(results.values())
        if len(results_values) == 0:
            return 0

        n_results = len(results_values[0])
        if n_results == 0:
            return 0

        additional = max(offset + n_results - ds_out.shape[0], 0)
        ds_out.append_shape(additional)

        self.upload(
            results,
            ds_out[offset:offset + n_results],
            token=token,
        )

        return n_results
コード例 #2
0
ファイル: transform.py プロジェクト: thomascherickal/Hub
    def store_shard(self,
                    ds_in: Iterable,
                    ds_out: Dataset,
                    offset: int,
                    token=None):
        """
        Takes a shard of iteratable ds_in, compute and stores in DatasetView
        """
        def _func_argd(item):
            return self._func(item, **self.kwargs)

        ds_in = list(ds_in)
        results = self.map(
            _func_argd,
            ds_in,
        )

        results = self._unwrap(results)
        results = self.map(lambda x: self._flatten_dict(x, schema=self.schema),
                           results)
        results = list(results)

        results = self._split_list_to_dicts(results)

        results_values = list(results.values())
        if len(results_values) == 0:
            return 0

        n_results = len(results_values[0])
        if n_results == 0:
            return 0

        additional = max(offset + n_results - ds_out.shape[0], 0)

        ds_out.append_shape(additional)

        self.upload(
            results,
            ds_out[offset:offset + n_results],
            token=token,
        )
        return n_results