Python Array.to_pylist 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: pyarrow

클래스/타입: Array

메소드/함수: to_pylist

hotexamples.com에서의 예제들: 3

Python Array.to_pylist - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 pyarrow.Array.to_pylist에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

buffers(18)

flatten(13)

cast(5)

to_numpy(5)

filter(3)

is_valid(3)

to_pylist(3)

combine_chunks(2)

field(2)

pyarrow.ChunkedArray(2)

is_null(1)

iterchunks(1)

take(1)

to_list_of_numpy(1)

tolist(1)

view(1)

예제 #1

파일 보기

 def _arrow_array_to_numpy(self, pa_array: pa.Array) -> np.ndarray:
     zero_copy_only = _is_zero_copy_only(pa_array.type)
     if isinstance(pa_array, pa.ChunkedArray):
         # don't call to_numpy() directly or we end up with a np.array with dtype object
         # call to_numpy on the chunks instead
         # for ArrayExtensionArray call py_list directly to support dynamic dimensions
         if isinstance(pa_array.type, _ArrayXDExtensionType):
             array: List = [
                 row for chunk in pa_array.chunks
                 for row in chunk.to_pylist()
             ]
         else:
             array: List = [
                 row for chunk in pa_array.chunks
                 for row in chunk.to_numpy(zero_copy_only=zero_copy_only)
             ]
     else:
         # cast to list of arrays or we end up with a np.array with dtype object
         # for ArrayExtensionArray call py_list directly to support dynamic dimensions
         if isinstance(pa_array.type, _ArrayXDExtensionType):
             array: List = pa_array.to_pylist()
         else:
             array: List = pa_array.to_numpy(
                 zero_copy_only=zero_copy_only).tolist()
     if len(array) > 0:
         if any(
                 isinstance(x, np.ndarray) and (
                     x.dtype == np.object or x.shape != array[0].shape)
                 for x in array):
             return np.array(array,
                             copy=False,
                             **{
                                 **self.np_array_kwargs, "dtype": np.object
                             })
     return np.array(array, copy=False, **self.np_array_kwargs)

예제 #2

파일 보기

    def add_input(self, accumulator: _PartialNLStats,
                  feature_path: types.FeaturePath,
                  feature_array: pa.Array) -> _PartialNLStats:
        """Return result of folding a batch of inputs into accumulator.

    Args:
      accumulator: The current accumulator.
      feature_path: The path of the feature.
      feature_array: An arrow Array representing a batch of feature values which
        should be added to the accumulator.

    Returns:
      The accumulator after updating the statistics for the batch of inputs.
    """
        if feature_path not in self._valid_feature_paths:
            accumulator.invalidate = True
            return accumulator

        feature_type = stats_util.get_feature_type_from_arrow_type(
            feature_path, feature_array.type)
        # Ignore null array.
        if feature_type is None:
            return accumulator

        if feature_type not in self._feature_type_fns:
            accumulator.invalidate = True
            return accumulator

        feature_type_fn = self._feature_type_fns[feature_type]

        vocab = None
        rvocab = None
        if self._nld_vocabularies[feature_path]:
            vocab_name = self._nld_vocabularies[feature_path]
            vocab = self._vocabs[vocab_name]
            rvocab = self._rvocabs[vocab_name]

        excluded_string_tokens = self._nld_excluded_string_tokens[feature_path]
        excluded_int_tokens = self._nld_excluded_int_tokens[feature_path]
        oov_string_tokens = self._nld_oov_string_tokens[feature_path]
        int_tokens = self._nld_specified_int_tokens[feature_path]
        string_tokens = self._nld_specified_str_tokens[feature_path]
        sequence_length_excluded_int_tokens = (
            self._nld_sequence_length_excluded_int_tokens[feature_path])
        sequence_length_excluded_string_tokens = (
            self._nld_sequence_length_excluded_string_tokens[feature_path])

        # TODO(b/175875824): Benchmark and optimize performance.
        for row in feature_array.to_pylist():
            if row is not None:
                feature_type_fn(row, accumulator, excluded_string_tokens,
                                excluded_int_tokens, oov_string_tokens, vocab,
                                rvocab, int_tokens, string_tokens,
                                sequence_length_excluded_int_tokens,
                                sequence_length_excluded_string_tokens,
                                self._num_histogram_buckets)
        return accumulator

예제 #3

파일 보기

파일: arrow_util_test.py 프로젝트: tensorflow/data-validation

def _Normalize(array: pa.Array) -> pa.Array:
    """Round trips array through python objects.

  Comparing nested arrays with slices is buggy in Arrow 2.0 this method
  is useful comparing two such arrays for logical equality. The bugs
  appears to be fixed as of Arrow 5.0 this should be removable once that
  becomes the minimum version.

  Args:
    array: The array to normalize.

  Returns:
    An array that doesn't have any more zero copy slices in itself or
    it's children. Note the schema might be slightly different for
    all null arrays.
  """
    return pa.array(array.to_pylist())