Python AppendRawRecordColumnの例

プログラミング言語: Python

名前空間/パッケージ名: tfx_bsl.tfxio.record_based_tfxio

メソッド/関数: AppendRawRecordColumn

hotexamples.comのコード掲載数: 7

Python AppendRawRecordColumn - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのtfx_bsl.tfxio.record_based_tfxio.AppendRawRecordColumnの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: tf_example_record.py プロジェクト: dhruvesh09/tfx-bsl

 def process(self, examples: List[bytes]):
     decoded = self._decoder.DecodeBatch(examples)
     if self._raw_record_column_name is None:
         yield decoded
     else:
         yield record_based_tfxio.AppendRawRecordColumn(
             decoded, self._raw_record_column_name, examples)

コード例 #2

ファイルを表示

ファイル: record_based_tfxio_test.py プロジェクト: brills/tfx-bsl

  def testAppendRawRecordColumn(
      self, input_record_batch,
      raw_records,
      expected_raw_record_column,
      record_index_column_name=None):
    # input_record_batch = pa.record_batch([pa.array([[1], [2]])], ["feature1"])
    column_name = "raw_record"
    output_record_batch = record_based_tfxio.AppendRawRecordColumn(
        record_batch=input_record_batch, column_name=column_name,
        raw_records=raw_records,
        produce_large_types=True,
        record_index_column_name=record_index_column_name)
    self.assertEqual(
        output_record_batch.num_columns,
        input_record_batch.num_columns + 1)
    for i in range(input_record_batch.num_columns):
      self.assertTrue(
          input_record_batch.column(i).equals(output_record_batch.column(i)))

    self.assertEqual(
        output_record_batch.schema.names[output_record_batch.num_columns - 1],
        column_name)
    self.assertTrue(
        output_record_batch.column(output_record_batch.num_columns - 1)
        .equals(expected_raw_record_column))

コード例 #3

ファイルを表示

 def process(self, records: List[bytes]) -> Iterator[pa.RecordBatch]:
     decoded = self._tensors_to_record_batch_converter.convert(
         self._decoder.decode_record(records))
     if self._raw_record_column_name is None:
         yield decoded
     else:
         yield record_based_tfxio.AppendRawRecordColumn(
             decoded, self._raw_record_column_name, records,
             self._produce_large_raw_record_column)

コード例 #4

ファイルを表示

 def process(self, records: List[bytes]) -> Iterator[pa.RecordBatch]:
   decoded = self._tensors_to_record_batch_converter.convert(
       self._decode_fn(tf.convert_to_tensor(records, dtype=tf.string)))
   if self._raw_record_column_name is None:
     yield decoded
   else:
     yield record_based_tfxio.AppendRawRecordColumn(
         decoded, self._raw_record_column_name, records,
         self._record_index_column_name)

コード例 #5

ファイルを表示

ファイル: tf_example_record.py プロジェクト: dhruvesh09/tfx-bsl

    def RecordBatches(
        self, options: dataset_options.RecordBatchesOptions
    ) -> Iterator[pa.RecordBatch]:
        dataset = dataset_util.make_tf_record_dataset(
            self._file_pattern, options.batch_size, options.drop_final_batch,
            options.num_epochs, options.shuffle, options.shuffle_buffer_size,
            options.shuffle_seed)

        decoder = example_coder.ExamplesToRecordBatchDecoder(
            self._schema.SerializeToString())
        for examples in dataset.as_numpy_iterator():
            decoded = decoder.DecodeBatch(examples)
            if self._raw_record_column_name is None:
                yield decoded
            else:
                yield record_based_tfxio.AppendRawRecordColumn(
                    decoded, self._raw_record_column_name, examples.tolist())

コード例 #6

ファイルを表示

    def _readDatasetIntoBatchedExtracts(self):
        """Read the raw dataset and massage examples into batched Extracts."""
        serialized_examples = list(
            self._dataset.read_raw_dataset(deserialize=False,
                                           limit=self._max_num_examples()))

        # TODO(b/153996019): Once the TFXIO interface that returns an iterator of
        # RecordBatch is available, clean this up.
        coder = example_coder.ExamplesToRecordBatchDecoder(
            serialized_schema=benchmark_utils.read_schema(
                self._dataset.tf_metadata_schema_path()).SerializeToString())
        batches = []
        for i in range(0, len(serialized_examples), _BATCH_SIZE):
            example_batch = serialized_examples[i:i + _BATCH_SIZE]
            record_batch = record_based_tfxio.AppendRawRecordColumn(
                coder.DecodeBatch(example_batch), constants.ARROW_INPUT_COLUMN,
                example_batch)
            batches.append({constants.ARROW_RECORD_BATCH_KEY: record_batch})
        return batches

コード例 #7

ファイルを表示

  def testAppendRawRecordColumn(
      self, input_record_batch,
      raw_records,
      expected_raw_record_column,
      record_index_column_name=None):
    column_name = "raw_record"
    output_record_batch = record_based_tfxio.AppendRawRecordColumn(
        record_batch=input_record_batch, column_name=column_name,
        raw_records=raw_records,
        record_index_column_name=record_index_column_name)
    self.assertEqual(
        output_record_batch.num_columns,
        input_record_batch.num_columns + 1)
    for i in range(input_record_batch.num_columns):
      self.assertTrue(
          input_record_batch.column(i).equals(output_record_batch.column(i)))

    self.assertEqual(
        output_record_batch.schema.names[output_record_batch.num_columns - 1],
        column_name)
    self.assertTrue(
        output_record_batch.column(output_record_batch.num_columns - 1)
        .equals(expected_raw_record_column))