コード例 #1
0
  def testFromCSVWithFeatureSpec(self):
    if not HAS_PANDAS:
      return
    num_batches = 100
    batch_size = 8

    data_path = _make_test_csv_sparse()
    feature_spec = {
        "int": tf.FixedLenFeature(None, dtypes.int16, np.nan),
        "float": tf.VarLenFeature(dtypes.float16),
        "bool": tf.VarLenFeature(dtypes.bool),
        "string": tf.FixedLenFeature(None, dtypes.string, "")
    }

    pandas_df = pd.read_csv(data_path, dtype={"string": object})
    # Pandas insanely uses NaN for empty cells in a string column.
    # And, we can't use Pandas replace() to fix them because nan != nan
    s = pandas_df["string"]
    for i in range(0, len(s)):
      if isinstance(s[i], float) and math.isnan(s[i]):
        s[i] = ""
    tensorflow_df = df.TensorFlowDataFrame.from_csv_with_feature_spec(
        [data_path],
        batch_size=batch_size,
        shuffle=False,
        feature_spec=feature_spec)

    # These columns were sparse; re-densify them for comparison
    tensorflow_df["float"] = densify.Densify(np.nan)(tensorflow_df["float"])
    tensorflow_df["bool"] = densify.Densify(np.nan)(tensorflow_df["bool"])

    self._assert_pandas_equals_tensorflow(pandas_df,
                                          tensorflow_df,
                                          num_batches=num_batches,
                                          batch_size=batch_size)
コード例 #2
0
    def testFromCSVWithFeatureSpec(self):
        if not HAS_PANDAS:
            return
        num_batches = 100
        batch_size = 8

        data_path = _make_test_csv_sparse()
        feature_spec = {
            "int": tf.FixedLenFeature(None, dtypes.int16, np.nan),
            "float": tf.VarLenFeature(dtypes.float16),
            "bool": tf.VarLenFeature(dtypes.bool)
        }

        pandas_df = pd.read_csv(data_path)
        tensorflow_df = df.TensorFlowDataFrame.from_csv_with_feature_spec(
            [data_path],
            batch_size=batch_size,
            shuffle=False,
            feature_spec=feature_spec)

        # These columns were sparse; re-densify them for comparison
        tensorflow_df["float"] = densify.Densify(np.nan)(
            tensorflow_df["float"])
        tensorflow_df["bool"] = densify.Densify(np.nan)(tensorflow_df["bool"])

        self._assert_pandas_equals_tensorflow(pandas_df,
                                              tensorflow_df,
                                              num_batches=num_batches,
                                              batch_size=batch_size)
コード例 #3
0
def _test_sparsify_densify(self, x, default_value):
    """Test roundtrip via Sparsify and Densify."""

    numpy_source = in_memory_source.NumpySource(x, batch_size=len(x))()

    (sparse_series, ) = sparsify.Sparsify(default_value)(numpy_source[1])
    (dense_series, ) = densify.Densify(default_value)(sparse_series)

    cache = {}
    sparse_tensor = sparse_series.build(cache)
    dense_tensor = dense_series.build(cache)

    with self.test_session() as sess:
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        sparse_val, dense_val = sess.run([sparse_tensor, dense_tensor])

        coord.request_stop()
        coord.join(threads)

    if x.dtype.kind not in ["S", "U"] and np.isnan(default_value):
        x_values = x[~np.isnan(x)]
        x_indexes = np.arange(len(x))[~np.isnan(x)].T.reshape(-1, 1)
    else:
        x_values = x[x != default_value]
        x_indexes = np.arange(len(x))[x != default_value].T.reshape(-1, 1)

    if x.dtype.kind in ["S", "U"]:
        # Python 2/3 compatibility
        # TensorFlow always returns bytes, so we just convert the unicode
        # expectations to bytes also before comparing.
        expected_x = [item.encode("utf-8") for item in x]
        expected_x_values = [item.encode("utf-8") for item in x_values]
    else:
        expected_x = x
        expected_x_values = x_values

    np.testing.assert_array_equal(len(x), sparse_val.shape[0])
    np.testing.assert_array_equal(expected_x_values, sparse_val.values)
    np.testing.assert_array_equal(x_indexes, sparse_val.indices)
    np.testing.assert_array_equal(expected_x, dense_val)