def _test(self, args, expected_out=None, expected_err_re=None):
    if expected_err_re is None:
      decode = parsing_ops.decode_csv(**args)
      out = self.evaluate(decode)

      for i, field in enumerate(out):
        if field.dtype == np.float32 or field.dtype == np.float64:
          self.assertAllClose(field, expected_out[i])
        else:
          self.assertAllEqual(field, expected_out[i])
    else:
      with self.assertRaisesOpError(expected_err_re):
        decode = parsing_ops.decode_csv(**args)
        self.evaluate(decode)
    def _test(self, args, expected_out=None, expected_err_re=None):
        if expected_err_re is None:
            decode = parsing_ops.decode_csv(**args)
            out = self.evaluate(decode)

            for i, field in enumerate(out):
                if field.dtype == np.float32 or field.dtype == np.float64:
                    self.assertAllClose(field, expected_out[i])
                else:
                    self.assertAllEqual(field, expected_out[i])
        else:
            with self.assertRaisesOpError(expected_err_re):
                decode = parsing_ops.decode_csv(**args)
                self.evaluate(decode)
Beispiel #3
0
 def _apply_transform(self, input_tensors):
   default_consts = [constant_op.constant(d, shape=[1])
                     for d in self._default_values]
   parsed_values = parsing_ops.decode_csv(input_tensors[0],
                                          record_defaults=default_consts)
   # pylint: disable=not-callable
   return self.return_type(*parsed_values)
Beispiel #4
0
 def testManagedMainErrorTwoQueues(self):
     # Tests that the supervisor correctly raises a main loop
     # error even when using multiple queues for input.
     logdir = self._test_dir("managed_main_error_two_queues")
     os.makedirs(logdir)
     data_path = self._csv_data(logdir)
     with self.assertRaisesRegexp(RuntimeError, "fail at step 3"):
         with ops.Graph().as_default():
             # Create an input pipeline that reads the file 3 times.
             filename_queue = input_lib.string_input_producer([data_path],
                                                              num_epochs=3)
             reader = io_ops.TextLineReader()
             _, csv = reader.read(filename_queue)
             rec = parsing_ops.decode_csv(csv,
                                          record_defaults=[[1], [1], [1]])
             shuff_rec = input_lib.shuffle_batch(rec, 1, 6, 4)
             sv = supervisor.Supervisor(logdir=logdir)
             with sv.managed_session("") as sess:
                 for step in range(9):
                     if sv.should_stop():
                         break
                     elif step == 3:
                         raise RuntimeError("fail at step 3")
                     else:
                         sess.run(shuff_rec)
Beispiel #5
0
 def _apply_transform(self, input_tensors):
   default_consts = [constant_op.constant(d, shape=[1])
                     for d in self._default_values]
   parsed_values = parsing_ops.decode_csv(input_tensors[0],
                                          record_defaults=default_consts)
   # pylint: disable=not-callable
   return self.return_type(*parsed_values)
Beispiel #6
0
 def _make_test_datasets(self, inputs, **kwargs):
   # Test by comparing its output to what we could get with map->decode_csv
   filenames = self._setup_files(inputs)
   dataset_expected = core_readers.TextLineDataset(filenames)
   dataset_expected = dataset_expected.map(
       lambda l: parsing_ops.decode_csv(l, **kwargs))
   dataset_actual = readers.CsvDataset(filenames, **kwargs)
   return (dataset_actual, dataset_expected)
 def _make_test_datasets(self, inputs, **kwargs):
     # Test by comparing its output to what we could get with map->decode_csv
     filenames = self._setup_files(inputs)
     dataset_expected = core_readers.TextLineDataset(filenames)
     dataset_expected = dataset_expected.map(
         lambda l: parsing_ops.decode_csv(l, **kwargs))
     dataset_actual = readers.CsvDataset(filenames, **kwargs)
     return (dataset_actual, dataset_expected)
 def benchmarkMapWithStrings(self):
   self._setUp(self.STR_VAL)
   for i in range(len(self._filenames)):
     num_cols = self._num_cols[i]
     kwargs = {'record_defaults': [['']] * num_cols}
     dataset = core_readers.TextLineDataset(self._filenames[i]).repeat()
     dataset = dataset.map(lambda l: parsing_ops.decode_csv(l, **kwargs))  # pylint: disable=cell-var-from-loop
     self._runBenchmark(dataset, num_cols, 'csv_strings_map_decode_csv')
   self._tearDown()
 def decode_csv_fn(x):
   return parsing_ops.decode_csv(
       x,
       record_defaults=[
           constant_op.constant([], dtypes.float32),
           constant_op.constant([], dtypes.int32),
           constant_op.constant([], dtypes.string)
       ],
       field_delim=":")
Beispiel #10
0
 def decode_csv_fn(x):
   return parsing_ops.decode_csv(
       x,
       record_defaults=[
           constant_op.constant([], dtypes.float32),
           constant_op.constant([], dtypes.int32),
           constant_op.constant([], dtypes.string)
       ],
       field_delim=":")
 def benchmark_map_with_floats(self):
     self._set_up(self.FLOAT_VAL)
     for i in range(len(self._filenames)):
         num_cols = self._num_cols[i]
         kwargs = {'record_defaults': [[0.0]] * num_cols}
         dataset = core_readers.TextLineDataset(self._filenames[i]).repeat()
         dataset = dataset.map(
             lambda l: parsing_ops.decode_csv(l, **kwargs))  # pylint: disable=cell-var-from-loop
         self._run_benchmark(dataset, num_cols, 'csv_float_map_decode_csv')
     self._tear_down()
  def _test(self, args, expected_out=None, expected_err_re=None):
    with self.test_session() as sess:
      decode = parsing_ops.decode_csv(**args)

      if expected_err_re is None:
        out = sess.run(decode)

        for i, field in enumerate(out):
          if field.dtype == np.float32:
            self.assertAllClose(field, expected_out[i])
          else:
            self.assertAllEqual(field, expected_out[i])

      else:
        with self.assertRaisesOpError(expected_err_re):
          sess.run(decode)
Beispiel #13
0
 def testManagedEndOfInputOneQueue(self):
     # Tests that the supervisor finishes without an error when using
     # a fixed number of epochs, reading from a single queue.
     logdir = self._test_dir("managed_end_of_input_one_queue")
     os.makedirs(logdir)
     data_path = self._csv_data(logdir)
     with ops.Graph().as_default():
         # Create an input pipeline that reads the file 3 times.
         filename_queue = input_lib.string_input_producer([data_path],
                                                          num_epochs=3)
         reader = io_ops.TextLineReader()
         _, csv = reader.read(filename_queue)
         rec = parsing_ops.decode_csv(csv, record_defaults=[[1], [1], [1]])
         sv = supervisor.Supervisor(logdir=logdir)
         with sv.managed_session("") as sess:
             while not sv.should_stop():
                 sess.run(rec)
Beispiel #14
0
  def decode_csv(line):
    """Decodes csv line into features.

    Args:
      line: String tensor corresponding to one csv record.
    Returns:
      A dictionary of feature names to values for that particular record. If
      label_key is provided, extracts the label feature to be returned as the
      second element of the tuple.
    """
    columns = parsing_ops.decode_csv(
        line,
        column_defaults,
        field_delim=field_delim,
        use_quote_delim=use_quote_delim)
    features = dict(zip(column_keys, columns))
    if label_key is not None:
      label = features.pop(label_key)
      return features, label
    return features
Beispiel #15
0
 def _process_records(self, lines):
   """Parse `lines` as CSV records."""
   if self._column_dtypes is None:
     default_values = [(array_ops.zeros([], dtypes.int64),)
                       if column_name == feature_keys.TrainEvalFeatures.TIMES
                       else () for column_name in self._column_names]
   else:
     default_values = [(array_ops.zeros([], dtype),)
                       for dtype in self._column_dtypes]
   columns = parsing_ops.decode_csv(lines, default_values)
   features_lists = {}
   for column_name, value in zip(self._column_names, columns):
     features_lists.setdefault(column_name, []).append(value)
   features = {}
   for column_name, values in features_lists.items():
     if column_name == feature_keys.TrainEvalFeatures.TIMES:
       features[column_name] = values[0]
     else:
       features[column_name] = array_ops.stack(values, axis=1)
   return features
 def loop_fn(i):
   line = array_ops.gather(csv_tensor, i)
   return parsing_ops.decode_csv(line, **kwargs)