def testIgnoreErrWithUnquotedQuotes(self):
   record_defaults = [['']] * 3
   inputs = [['1,2"3,4', 'a,b,c"d', '9,8"7,6,5', 'e,f,g']]
   filenames = self._setup_files(inputs)
   dataset = readers.CsvDataset(filenames, record_defaults=record_defaults)
   dataset = dataset.apply(error_ops.ignore_errors())
   self.assertDatasetProduces(dataset, [(b'e', b'f', b'g')])
  def testReadFileIgnoreError(self):

    def write_string_to_file(value, filename):
      with open(filename, "w") as f:
        f.write(value)

    filenames = [
        os.path.join(self.get_temp_dir(), "file_%d.txt" % i) for i in range(5)
    ]
    for filename in filenames:
      write_string_to_file(filename, filename)

    dataset = (
        dataset_ops.Dataset.from_tensor_slices(filenames).map(
            io_ops.read_file,
            num_parallel_calls=2).prefetch(2).apply(error_ops.ignore_errors()))
    get_next = self.getNext(dataset)

    # All of the files are present.
    for filename in filenames:
      self.assertEqual(compat.as_bytes(filename), self.evaluate(get_next()))
    with self.assertRaises(errors.OutOfRangeError):
      self.evaluate(get_next())

    # Delete one of the files.
    os.remove(filenames[0])

    # Attempting to read filenames[0] will fail, but ignore_errors()
    # will catch the error.
    get_next = self.getNext(dataset)
    for filename in filenames[1:]:
      self.assertEqual(compat.as_bytes(filename), self.evaluate(get_next()))
    with self.assertRaises(errors.OutOfRangeError):
      self.evaluate(get_next())
    def testReadFileIgnoreError(self):
        def write_string_to_file(value, filename):
            with open(filename, "w") as f:
                f.write(value)

        filenames = [
            os.path.join(self.get_temp_dir(), "file_%d.txt" % i)
            for i in range(5)
        ]
        for filename in filenames:
            write_string_to_file(filename, filename)

        dataset = (dataset_ops.Dataset.from_tensor_slices(filenames).map(
            io_ops.read_file,
            num_parallel_calls=2).prefetch(2).apply(error_ops.ignore_errors()))
        get_next = self.getNext(dataset)

        # All of the files are present.
        for filename in filenames:
            self.assertEqual(compat.as_bytes(filename),
                             self.evaluate(get_next()))
        with self.assertRaises(errors.OutOfRangeError):
            self.evaluate(get_next())

        # Delete one of the files.
        os.remove(filenames[0])

        # Attempting to read filenames[0] will fail, but ignore_errors()
        # will catch the error.
        get_next = self.getNext(dataset)
        for filename in filenames[1:]:
            self.assertEqual(compat.as_bytes(filename),
                             self.evaluate(get_next()))
        with self.assertRaises(errors.OutOfRangeError):
            self.evaluate(get_next())
Example #4
0
 def testCsvDataset_ignoreErrWithUnquotedQuotes(self):
   record_defaults = [['']] * 3
   inputs = [['1,2"3,4', 'a,b,c"d', '9,8"7,6,5', 'e,f,g']]
   filenames = self._setup_files(inputs)
   dataset = readers.CsvDataset(filenames, record_defaults=record_defaults)
   dataset = dataset.apply(error_ops.ignore_errors())
   self._verify_output_or_err(dataset, [['e', 'f', 'g']])
Example #5
0
def ignore_errors():
    """Creates a `Dataset` from another `Dataset` and silently ignores any errors.

  Use this transformation to produce a dataset that contains the same elements
  as the input, but silently drops any elements that caused an error. For
  example:

  ```python
  dataset = tf.data.Dataset.from_tensor_slices([1., 2., 0., 4.])

  # Computing `tf.debugging.check_numerics(1. / 0.)` will raise an
  InvalidArgumentError.
  dataset = dataset.map(lambda x: tf.debugging.check_numerics(1. / x, "error"))

  # Using `ignore_errors()` will drop the element that causes an error.
  dataset =
      dataset.apply(tf.data.experimental.ignore_errors())  # ==> { 1., 0.5, 0.2
      }
  ```

  Returns:
    A `Dataset` transformation function, which can be passed to
    `tf.data.Dataset.apply`.
  """
    return error_ops.ignore_errors()
Example #6
0
 def testCsvDataset_ignoreErrWithUnquotedQuotes(self):
   record_defaults = [['']] * 3
   inputs = [['1,2"3,4', 'a,b,c"d', '9,8"7,6,5', 'e,f,g']]
   filenames = self._setup_files(inputs)
   dataset = readers.CsvDataset(filenames, record_defaults=record_defaults)
   dataset = dataset.apply(error_ops.ignore_errors())
   self._verify_output_or_err(dataset, [['e', 'f', 'g']])
Example #7
0
 def testCsvDataset_ignoreErrWithUnescapedQuotes(self):
     record_defaults = [['']] * 3
     inputs = [['1,"2"3",4', '1,"2"3",4",5,5', 'a,b,"c"d"', 'e,f,g']]
     filenames = self._setup_files(inputs)
     dataset = readers.CsvDataset(filenames,
                                  record_defaults=record_defaults)
     dataset = dataset.apply(error_ops.ignore_errors())
     self.assertDatasetProduces(dataset, [(b'e', b'f', b'g')])
    def _build_ds(self):
        components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)

        dataset = dataset_ops.Dataset.from_tensor_slices(components)
        dataset = dataset.map(lambda x: array_ops.check_numerics(x, "message"))
        dataset = dataset.apply(error_ops.ignore_errors())
        options = options_lib.Options()
        options.experimental_external_state_policy = (
            options_lib.ExternalStatePolicy.IGNORE)
        return dataset.with_options(options)
Example #9
0
  def testZipIgnoreError(self):
    a = dataset_ops.Dataset.from_tensor_slices([1., 2., 0., 4.])
    b = a.map(lambda x: array_ops.check_numerics(1. / x, "error"))

    dataset = dataset_ops.Dataset.zip((b, a)).apply(error_ops.ignore_errors())
    get_next = self.getNext(dataset)

    for x in [1., 2., 4.]:
      self.assertEqual((1. / x, x), self.evaluate(get_next()))
    with self.assertRaises(errors.OutOfRangeError):
      self.evaluate(get_next())
Example #10
0
 def filename_to_dataset(filename):
     dataset = CsvDataset(filename,
                          record_defaults=column_defaults,
                          field_delim=field_delim,
                          use_quote_delim=use_quote_delim,
                          na_value=na_value,
                          select_cols=select_columns,
                          header=header,
                          compression_type=compression_type)
     if ignore_errors:
         dataset = dataset.apply(error_ops.ignore_errors())
     return dataset
    def testParallelMapIgnoreError(self):
        components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)

        dataset = (dataset_ops.Dataset.from_tensor_slices(components).map(
            lambda x: array_ops.check_numerics(x, "message"),
            num_parallel_calls=2).prefetch(2).apply(error_ops.ignore_errors()))
        get_next = self.getNext(dataset)

        for x in [1., 2., 3., 5.]:
            self.assertEqual(x, self.evaluate(get_next()))
        with self.assertRaises(errors.OutOfRangeError):
            self.evaluate(get_next())
  def testParallelMapIgnoreError(self):
    components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)

    dataset = (
        dataset_ops.Dataset.from_tensor_slices(components).map(
            lambda x: array_ops.check_numerics(x, "message"),
            num_parallel_calls=2).prefetch(2).apply(error_ops.ignore_errors()))
    get_next = self.getNext(dataset)

    for x in [1., 2., 3., 5.]:
      self.assertEqual(x, self.evaluate(get_next()))
    with self.assertRaises(errors.OutOfRangeError):
      self.evaluate(get_next())
 def testIgnoreError_withLogWarning(self):
     components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)
     dataset = (dataset_ops.Dataset.from_tensor_slices(components).map(
         lambda x: array_ops.check_numerics(x, "message")).apply(
             error_ops.ignore_errors(log_warning=True)))
     get_next = self.getNext(dataset)
     for x in [1., 2., 3.]:
         self.assertEqual(x, self.evaluate(get_next()))
     with self.captureWritesToStream(sys.stderr) as logged:
         self.assertEqual(5., self.evaluate(get_next()))
     expected = "Tensor had NaN values"
     self.assertIn((expected), logged.contents())
     with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(get_next())
Example #14
0
 def filename_to_dataset(filename):
   dataset = CsvDataset(
       filename,
       record_defaults=column_defaults,
       field_delim=field_delim,
       use_quote_delim=use_quote_delim,
       na_value=na_value,
       select_cols=select_columns,
       header=header,
       compression_type=compression_type
   )
   if ignore_errors:
     dataset = dataset.apply(error_ops.ignore_errors())
   return dataset
    def testParallelMapIgnoreError(self):
        components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)

        dataset = (dataset_ops.Dataset.from_tensor_slices(components).map(
            lambda x: array_ops.check_numerics(x, "message"),
            num_parallel_calls=2).prefetch(2).apply(error_ops.ignore_errors()))
        iterator = dataset.make_initializable_iterator()
        init_op = iterator.initializer
        get_next = iterator.get_next()

        with self.cached_session() as sess:
            self.evaluate(init_op)
            for x in [1., 2., 3., 5.]:
                self.assertEqual(x, self.evaluate(get_next))
            with self.assertRaises(errors.OutOfRangeError):
                self.evaluate(get_next)
  def testParallelMapIgnoreError(self):
    components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)

    dataset = (
        dataset_ops.Dataset.from_tensor_slices(components).map(
            lambda x: array_ops.check_numerics(x, "message"),
            num_parallel_calls=2).prefetch(2).apply(error_ops.ignore_errors()))
    iterator = dataset.make_initializable_iterator()
    init_op = iterator.initializer
    get_next = iterator.get_next()

    with self.cached_session() as sess:
      self.evaluate(init_op)
      for x in [1., 2., 3., 5.]:
        self.assertEqual(x, self.evaluate(get_next))
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(get_next)
    def testTFRecordDatasetIgnoreError(self):
        filenames = []
        for i in range(5):
            fn = os.path.join(self.get_temp_dir(), "tf_record.%d.txt" % i)
            filenames.append(fn)
            writer = python_io.TFRecordWriter(fn)
            for _ in range(10):
                writer.write(b"record")
            writer.close()
            # Append corrupted data
            with open(fn, "a") as f:
                f.write("corrupted data")

        dataset = readers.TFRecordDataset(filenames).apply(
            error_ops.ignore_errors())
        get_next = self.getNext(dataset)

        # All of the files are present.
        for _ in filenames:
            for _ in range(10):
                self.assertEqual(b"record", self.evaluate(get_next()))
        with self.assertRaises(errors.OutOfRangeError):
            self.evaluate(get_next())
  def testTFRecordDatasetIgnoreError(self):
    filenames = []
    for i in range(5):
      fn = os.path.join(self.get_temp_dir(), "tf_record.%d.txt" % i)
      filenames.append(fn)
      writer = python_io.TFRecordWriter(fn)
      for j in range(10):
        writer.write(b"record")
      writer.close()
      # Append corrupted data
      with open(fn, "a") as f:
        f.write("corrupted data")

    dataset = readers.TFRecordDataset(filenames).apply(
        error_ops.ignore_errors())
    get_next = self.getNext(dataset)

    # All of the files are present.
    for filename in filenames:
      for j in range(10):
        self.assertEqual(b"record", self.evaluate(get_next()))
    with self.assertRaises(errors.OutOfRangeError):
      self.evaluate(get_next())
Example #19
0
def ignore_errors():
  """Creates a `Dataset` from another `Dataset` and silently ignores any errors.

  Use this transformation to produce a dataset that contains the same elements
  as the input, but silently drops any elements that caused an error. For
  example:

  ```python
  dataset = tf.data.Dataset.from_tensor_slices([1., 2., 0., 4.])

  # Computing `tf.check_numerics(1. / 0.)` will raise an InvalidArgumentError.
  dataset = dataset.map(lambda x: tf.check_numerics(1. / x, "error"))

  # Using `ignore_errors()` will drop the element that causes an error.
  dataset =
      dataset.apply(tf.contrib.data.ignore_errors())  # ==> { 1., 0.5, 0.2 }
  ```

  Returns:
    A `Dataset` transformation function, which can be passed to
    `tf.data.Dataset.apply`.
  """
  return error_ops.ignore_errors()
 def _build_ds(self):
     return dataset_ops.Dataset.range(5).map(
         array_ops.ones).map(lambda x: array_ops.gather(x, [0])).apply(
             error_ops.ignore_errors())
 def _build_ds(self, components):
     return dataset_ops.Dataset.from_tensor_slices(components).map(
         lambda x: array_ops.check_numerics(x, "message")).apply(
             error_ops.ignore_errors())
 def testCardinality(self):
     ds = dataset_ops.Dataset.range(10).apply(error_ops.ignore_errors())
     self.assertEqual(self.evaluate(ds.cardinality()), dataset_ops.UNKNOWN)
 def _build_ds(self, components):
   return dataset_ops.Dataset.from_tensor_slices(components).map(
       lambda x: array_ops.check_numerics(x, "message")).apply(
           error_ops.ignore_errors())