def _test(self, args, expected_out=None, expected_err_re=None): if expected_err_re is None: decode = parsing_ops.decode_csv(**args) out = self.evaluate(decode) for i, field in enumerate(out): if field.dtype == np.float32 or field.dtype == np.float64: self.assertAllClose(field, expected_out[i]) else: self.assertAllEqual(field, expected_out[i]) else: with self.assertRaisesOpError(expected_err_re): decode = parsing_ops.decode_csv(**args) self.evaluate(decode)
def _apply_transform(self, input_tensors): default_consts = [constant_op.constant(d, shape=[1]) for d in self._default_values] parsed_values = parsing_ops.decode_csv(input_tensors[0], record_defaults=default_consts) # pylint: disable=not-callable return self.return_type(*parsed_values)
def testManagedMainErrorTwoQueues(self): # Tests that the supervisor correctly raises a main loop # error even when using multiple queues for input. logdir = self._test_dir("managed_main_error_two_queues") os.makedirs(logdir) data_path = self._csv_data(logdir) with self.assertRaisesRegexp(RuntimeError, "fail at step 3"): with ops.Graph().as_default(): # Create an input pipeline that reads the file 3 times. filename_queue = input_lib.string_input_producer([data_path], num_epochs=3) reader = io_ops.TextLineReader() _, csv = reader.read(filename_queue) rec = parsing_ops.decode_csv(csv, record_defaults=[[1], [1], [1]]) shuff_rec = input_lib.shuffle_batch(rec, 1, 6, 4) sv = supervisor.Supervisor(logdir=logdir) with sv.managed_session("") as sess: for step in range(9): if sv.should_stop(): break elif step == 3: raise RuntimeError("fail at step 3") else: sess.run(shuff_rec)
def _make_test_datasets(self, inputs, **kwargs): # Test by comparing its output to what we could get with map->decode_csv filenames = self._setup_files(inputs) dataset_expected = core_readers.TextLineDataset(filenames) dataset_expected = dataset_expected.map( lambda l: parsing_ops.decode_csv(l, **kwargs)) dataset_actual = readers.CsvDataset(filenames, **kwargs) return (dataset_actual, dataset_expected)
def benchmarkMapWithStrings(self): self._setUp(self.STR_VAL) for i in range(len(self._filenames)): num_cols = self._num_cols[i] kwargs = {'record_defaults': [['']] * num_cols} dataset = core_readers.TextLineDataset(self._filenames[i]).repeat() dataset = dataset.map(lambda l: parsing_ops.decode_csv(l, **kwargs)) # pylint: disable=cell-var-from-loop self._runBenchmark(dataset, num_cols, 'csv_strings_map_decode_csv') self._tearDown()
def decode_csv_fn(x): return parsing_ops.decode_csv( x, record_defaults=[ constant_op.constant([], dtypes.float32), constant_op.constant([], dtypes.int32), constant_op.constant([], dtypes.string) ], field_delim=":")
def benchmark_map_with_floats(self): self._set_up(self.FLOAT_VAL) for i in range(len(self._filenames)): num_cols = self._num_cols[i] kwargs = {'record_defaults': [[0.0]] * num_cols} dataset = core_readers.TextLineDataset(self._filenames[i]).repeat() dataset = dataset.map( lambda l: parsing_ops.decode_csv(l, **kwargs)) # pylint: disable=cell-var-from-loop self._run_benchmark(dataset, num_cols, 'csv_float_map_decode_csv') self._tear_down()
def _test(self, args, expected_out=None, expected_err_re=None): with self.test_session() as sess: decode = parsing_ops.decode_csv(**args) if expected_err_re is None: out = sess.run(decode) for i, field in enumerate(out): if field.dtype == np.float32: self.assertAllClose(field, expected_out[i]) else: self.assertAllEqual(field, expected_out[i]) else: with self.assertRaisesOpError(expected_err_re): sess.run(decode)
def testManagedEndOfInputOneQueue(self): # Tests that the supervisor finishes without an error when using # a fixed number of epochs, reading from a single queue. logdir = self._test_dir("managed_end_of_input_one_queue") os.makedirs(logdir) data_path = self._csv_data(logdir) with ops.Graph().as_default(): # Create an input pipeline that reads the file 3 times. filename_queue = input_lib.string_input_producer([data_path], num_epochs=3) reader = io_ops.TextLineReader() _, csv = reader.read(filename_queue) rec = parsing_ops.decode_csv(csv, record_defaults=[[1], [1], [1]]) sv = supervisor.Supervisor(logdir=logdir) with sv.managed_session("") as sess: while not sv.should_stop(): sess.run(rec)
def decode_csv(line): """Decodes csv line into features. Args: line: String tensor corresponding to one csv record. Returns: A dictionary of feature names to values for that particular record. If label_key is provided, extracts the label feature to be returned as the second element of the tuple. """ columns = parsing_ops.decode_csv( line, column_defaults, field_delim=field_delim, use_quote_delim=use_quote_delim) features = dict(zip(column_keys, columns)) if label_key is not None: label = features.pop(label_key) return features, label return features
def _process_records(self, lines): """Parse `lines` as CSV records.""" if self._column_dtypes is None: default_values = [(array_ops.zeros([], dtypes.int64),) if column_name == feature_keys.TrainEvalFeatures.TIMES else () for column_name in self._column_names] else: default_values = [(array_ops.zeros([], dtype),) for dtype in self._column_dtypes] columns = parsing_ops.decode_csv(lines, default_values) features_lists = {} for column_name, value in zip(self._column_names, columns): features_lists.setdefault(column_name, []).append(value) features = {} for column_name, values in features_lists.items(): if column_name == feature_keys.TrainEvalFeatures.TIMES: features[column_name] = values[0] else: features[column_name] = array_ops.stack(values, axis=1) return features
def loop_fn(i): line = array_ops.gather(csv_tensor, i) return parsing_ops.decode_csv(line, **kwargs)