コード例 #1
0
 def testRecordBatchesWithRawRecords(self):
     raw_example_column_name = "raw_records"
     tfxio = self._MakeTFXIO(_SCHEMA, raw_example_column_name)
     options = dataset_options.RecordBatchesOptions(
         batch_size=len(_EXAMPLES), shuffle=False, num_epochs=1)
     for record_batch in tfxio.RecordBatches(options):
         self._ValidateRecordBatch(tfxio, record_batch,
                                   raw_example_column_name)
コード例 #2
0
 def testRecordBatchesWithProject(self):
     tfxio = self._MakeTFXIO(_SCHEMA)
     feature_name = "string_feature"
     projected_tfxio = tfxio.Project([feature_name])
     options = dataset_options.RecordBatchesOptions(
         batch_size=len(_EXAMPLES), shuffle=False, num_epochs=1)
     for record_batch in projected_tfxio.RecordBatches(options):
         self._ValidateRecordBatch(projected_tfxio, record_batch)
         self.assertIn(feature_name, record_batch.schema.names)
         self.assertLen(record_batch.schema.names, 1)
コード例 #3
0
def _input_fn(
    file_pattern: Text,
    data_accessor: DataAccessor,
    schema: schema_pb2.Schema,
    batch_size: int = 20,
) -> Tuple[np.ndarray, np.ndarray]:
    """Generates features and label for tuning/training.

  Args:
    file_pattern: input tfrecord file pattern.
    data_accessor: DataAccessor for converting input to RecordBatch.
    schema: schema of the input data.
    batch_size: An int representing the number of records to combine in a single
      batch.

  Returns:
    A (features, indices) tuple where features is a matrix of features, and
      indices is a single vector of label indices.
  """
    record_batch_iterator = data_accessor.record_batch_factory(
        file_pattern,
        dataset_options.RecordBatchesOptions(batch_size=batch_size,
                                             num_epochs=1), schema)

    feature_list = []
    label_list = []
    for record_batch in record_batch_iterator:
        record_dict = {}
        for column, field in zip(record_batch, record_batch.schema):
            record_dict[field.name] = column.flatten()

        label_list.append(record_dict[_LABEL_KEY])
        features = [record_dict[key] for key in _FEATURE_KEYS]
        feature_list.append(np.stack(features, axis=-1))

    return np.concatenate(feature_list), np.concatenate(label_list)
コード例 #4
0
 def testRecordBatches(self):
     tfxio = self._MakeTFXIO(_SCHEMA)
     options = dataset_options.RecordBatchesOptions(
         batch_size=len(_EXAMPLES), shuffle=False, num_epochs=1)
     for record_batch in tfxio.RecordBatches(options):
         self._ValidateRecordBatch(tfxio, record_batch)