예제 #1
0
  def testPandasInputFn(self):
    if not HAS_PANDAS:
      return
    index = np.arange(100, 104)
    a = np.arange(4)
    b = np.arange(32, 36)
    x = pd.DataFrame({'a': a, 'b': b}, index=index)
    y_noindex = pd.Series(np.arange(-32, -28))
    y = pd.Series(np.arange(-32, -28), index=index)
    with self.test_session() as session:
      with self.assertRaises(ValueError):
        failing_input_fn = pandas_io.pandas_input_fn(
            x, y_noindex, batch_size=2, shuffle=False, num_epochs=1)
        failing_input_fn()
      input_fn = pandas_io.pandas_input_fn(
          x, y, batch_size=2, shuffle=False, num_epochs=1)
      features, target = input_fn()

      coord = tf.train.Coordinator()
      threads = tf.train.start_queue_runners(session, coord=coord)

      res = session.run([features, target])
      self.assertAllEqual(res[0]['index'], [100, 101])
      self.assertAllEqual(res[0]['a'], [0, 1])
      self.assertAllEqual(res[0]['b'], [32, 33])
      self.assertAllEqual(res[1], [-32, -31])

      session.run([features, target])
      with self.assertRaises(errors.OutOfRangeError):
        session.run([features, target])

      coord.request_stop()
      coord.join(threads)
예제 #2
0
 def testPandasInputFn_IndexMismatch(self):
   if not HAS_PANDAS:
     return
   x, _ = self.makeTestDataFrame()
   y_noindex = pd.Series(np.arange(-32, -28))
   with self.assertRaises(ValueError):
     pandas_io.pandas_input_fn(
         x, y_noindex, batch_size=2, shuffle=False, num_epochs=1)
예제 #3
0
 def testPandasInputFn_Idempotent(self):
   if not HAS_PANDAS:
     return
   x, y = self.makeTestDataFrame()
   for _ in range(2):
     pandas_io.pandas_input_fn(
         x, y, batch_size=2, shuffle=False, num_epochs=1)()
   for _ in range(2):
     pandas_io.pandas_input_fn(
         x, y, batch_size=2, shuffle=True, num_epochs=1)()
예제 #4
0
 def testPandasInputFn_IndexMismatch(self):
     if not HAS_PANDAS:
         return
     x, _ = self.makeTestDataFrame()
     y_noindex = pd.Series(np.arange(-32, -28))
     with self.assertRaises(ValueError):
         pandas_io.pandas_input_fn(x,
                                   y_noindex,
                                   batch_size=2,
                                   shuffle=False,
                                   num_epochs=1)
예제 #5
0
    def testPandasInputFn_ProducesOutputsForLargeBatchAndMultipleEpochs(self):
        if not HAS_PANDAS:
            return
        with self.test_session() as session:
            index = np.arange(100, 102)
            a = np.arange(2)
            b = np.arange(32, 34)
            x = pd.DataFrame({'a': a, 'b': b}, index=index)
            y = pd.Series(np.arange(-32, -30), index=index)
            input_fn = pandas_io.pandas_input_fn(x,
                                                 y,
                                                 batch_size=128,
                                                 shuffle=False,
                                                 num_epochs=2)

            results = input_fn()

            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(session,
                                                            coord=coord)

            features, target = session.run(results)
            self.assertAllEqual(features['a'], [0, 1, 0, 1])
            self.assertAllEqual(features['b'], [32, 33, 32, 33])
            self.assertAllEqual(target, [-32, -31, -32, -31])

            with self.assertRaises(errors.OutOfRangeError):
                session.run(results)

            coord.request_stop()
            coord.join(threads)
예제 #6
0
 def testPandasInputFn_Idempotent(self):
     if not HAS_PANDAS:
         return
     x, y = self.makeTestDataFrame()
     for _ in range(2):
         pandas_io.pandas_input_fn(x,
                                   y,
                                   batch_size=2,
                                   shuffle=False,
                                   num_epochs=1)()
     for _ in range(2):
         pandas_io.pandas_input_fn(x,
                                   y,
                                   batch_size=2,
                                   shuffle=True,
                                   num_epochs=1)()
예제 #7
0
  def testPandasInputFn_ProducesOutputsForLargeBatchAndMultipleEpochs(self):
    if not HAS_PANDAS:
      return
    with self.test_session() as session:
      index = np.arange(100, 102)
      a = np.arange(2)
      b = np.arange(32, 34)
      x = pd.DataFrame({'a': a, 'b': b}, index=index)
      y = pd.Series(np.arange(-32, -30), index=index)
      input_fn = pandas_io.pandas_input_fn(
          x, y, batch_size=128, shuffle=False, num_epochs=2)

      results = input_fn()

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      features, target = session.run(results)
      self.assertAllEqual(features['a'], [0, 1, 0, 1])
      self.assertAllEqual(features['b'], [32, 33, 32, 33])
      self.assertAllEqual(target, [-32, -31, -32, -31])

      with self.assertRaises(errors.OutOfRangeError):
        session.run(results)

      coord.request_stop()
      coord.join(threads)
예제 #8
0
  def testPandasInputFn_RespectsEpoch_NoShuffle(self):
    if not HAS_PANDAS:
      return
    with self.test_session() as session:
      x, y = self.makeTestDataFrame()
      input_fn = pandas_io.pandas_input_fn(
          x, y, batch_size=4, shuffle=False, num_epochs=1)

      self.assertInputsCallableNTimes(input_fn, session, 1)
예제 #9
0
  def testPandasInputFn_RespectsEpoch_WithShuffleAutosize(self):
    if not HAS_PANDAS:
      return
    with self.test_session() as session:
      x, y = self.makeTestDataFrame()
      input_fn = pandas_io.pandas_input_fn(
          x, y, batch_size=2, shuffle=True, queue_capacity=None, num_epochs=2)

      self.assertInputsCallableNTimes(input_fn, session, 4)
예제 #10
0
  def testPandasInputFn_RespectsEpochUnevenBatches(self):
    if not HAS_PANDAS:
      return
    x, y = self.makeTestDataFrame()
    with self.test_session() as session:
      input_fn = pandas_io.pandas_input_fn(
          x, y, batch_size=3, shuffle=False, num_epochs=1)

      # Before the last batch, only one element of the epoch should remain.
      self.assertInputsCallableNTimes(input_fn, session, 2)
예제 #11
0
  def testPandasInputFn_ExcludesIndex(self):
    if not HAS_PANDAS:
      return
    with self.test_session() as session:
      x, y = self.makeTestDataFrame()
      input_fn = pandas_io.pandas_input_fn(
          x, y, batch_size=2, shuffle=False, num_epochs=1)

      features, _ = self.callInputFnOnce(input_fn, session)

      self.assertFalse('index' in features)
예제 #12
0
  def testPandasInputFn_OnlyX(self):
    if not HAS_PANDAS:
      return
    with self.test_session() as session:
      x, _ = self.makeTestDataFrame()
      input_fn = pandas_io.pandas_input_fn(
          x, y=None, batch_size=2, shuffle=False, num_epochs=1)

      features = self.callInputFnOnce(input_fn, session)

      self.assertAllEqual(features['a'], [0, 1])
      self.assertAllEqual(features['b'], [32, 33])
예제 #13
0
    def testPandasInputFn_RespectsEpoch_WithShuffle(self):
        if not HAS_PANDAS:
            return
        with self.test_session() as session:
            x, y = self.makeTestDataFrame()
            input_fn = pandas_io.pandas_input_fn(x,
                                                 y,
                                                 batch_size=4,
                                                 shuffle=True,
                                                 num_epochs=1)

            self.assertInputsCallableNTimes(input_fn, session, 1)
    def testPandasInputFn(self):
        if not HAS_PANDAS:
            return
        index = np.arange(100, 104)
        a = np.arange(4)
        b = np.arange(32, 36)
        x = pd.DataFrame({'a': a, 'b': b}, index=index)
        y_noindex = pd.Series(np.arange(-32, -28))
        y = pd.Series(np.arange(-32, -28), index=index)
        with self.test_session() as session:
            with self.assertRaises(ValueError):
                failing_input_fn = pandas_io.pandas_input_fn(x,
                                                             y_noindex,
                                                             batch_size=2,
                                                             shuffle=False,
                                                             num_epochs=1)
                failing_input_fn()
            input_fn = pandas_io.pandas_input_fn(x,
                                                 y,
                                                 batch_size=2,
                                                 shuffle=False,
                                                 num_epochs=1)
            features, target = input_fn()

            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(session, coord=coord)

            res = session.run([features, target])
            self.assertAllEqual(res[0]['index'], [100, 101])
            self.assertAllEqual(res[0]['a'], [0, 1])
            self.assertAllEqual(res[0]['b'], [32, 33])
            self.assertAllEqual(res[1], [-32, -31])

            session.run([features, target])
            with self.assertRaises(errors.OutOfRangeError):
                session.run([features, target])

            coord.request_stop()
            coord.join(threads)
예제 #15
0
    def testPandasInputFn_RespectsEpochUnevenBatches(self):
        if not HAS_PANDAS:
            return
        x, y = self.makeTestDataFrame()
        with self.test_session() as session:
            input_fn = pandas_io.pandas_input_fn(x,
                                                 y,
                                                 batch_size=3,
                                                 shuffle=False,
                                                 num_epochs=1)

            # Before the last batch, only one element of the epoch should remain.
            self.assertInputsCallableNTimes(input_fn, session, 2)
예제 #16
0
    def testPandasInputFn_RespectsEpoch_WithShuffleAutosize(self):
        if not HAS_PANDAS:
            return
        with self.cached_session() as session:
            x, y = self.makeTestDataFrame()
            input_fn = pandas_io.pandas_input_fn(x,
                                                 y,
                                                 batch_size=2,
                                                 shuffle=True,
                                                 queue_capacity=None,
                                                 num_epochs=2)

            self.assertInputsCallableNTimes(input_fn, session, 4)
예제 #17
0
  def testPandasInputFn_ProducesExpectedOutputs(self):
    if not HAS_PANDAS:
      return
    with self.test_session() as session:
      x, y = self.makeTestDataFrame()
      input_fn = pandas_io.pandas_input_fn(
          x, y, batch_size=2, shuffle=False, num_epochs=1)

      features, target = self.callInputFnOnce(input_fn, session)

      self.assertAllEqual(features['a'], [0, 1])
      self.assertAllEqual(features['b'], [32, 33])
      self.assertAllEqual(target, [-32, -31])
예제 #18
0
    def testPandasInputFn_ExcludesIndex(self):
        if not HAS_PANDAS:
            return
        with self.test_session() as session:
            x, y = self.makeTestDataFrame()
            input_fn = pandas_io.pandas_input_fn(x,
                                                 y,
                                                 batch_size=2,
                                                 shuffle=False,
                                                 num_epochs=1)

            features, _ = self.callInputFnOnce(input_fn, session)

            self.assertFalse('index' in features)
예제 #19
0
    def testPandasInputFn_OnlyX(self):
        if not HAS_PANDAS:
            return
        with self.test_session() as session:
            x, _ = self.makeTestDataFrame()
            input_fn = pandas_io.pandas_input_fn(x,
                                                 y=None,
                                                 batch_size=2,
                                                 shuffle=False,
                                                 num_epochs=1)

            features = self.callInputFnOnce(input_fn, session)

            self.assertAllEqual(features['a'], [0, 1])
            self.assertAllEqual(features['b'], [32, 33])
예제 #20
0
    def testPandasInputFn_ProducesExpectedOutputs(self):
        if not HAS_PANDAS:
            return
        with self.test_session() as session:
            x, y = self.makeTestDataFrame()
            input_fn = pandas_io.pandas_input_fn(x,
                                                 y,
                                                 batch_size=2,
                                                 shuffle=False,
                                                 num_epochs=1)

            features, target = self.callInputFnOnce(input_fn, session)

            self.assertAllEqual(features['a'], [0, 1])
            self.assertAllEqual(features['b'], [32, 33])
            self.assertAllEqual(target, [-32, -31])
예제 #21
0
    def testPandasInputFn_ProducesOutputsWhenDataSizeNotDividedByBatchSize(
            self):
        if not HAS_PANDAS:
            return
        with self.test_session() as session:
            index = np.arange(100, 105)
            a = np.arange(5)
            b = np.arange(32, 37)
            x = pd.DataFrame({'a': a, 'b': b}, index=index)
            y = pd.Series(np.arange(-32, -27), index=index)

            input_fn = pandas_io.pandas_input_fn(x,
                                                 y,
                                                 batch_size=2,
                                                 shuffle=False,
                                                 num_epochs=1)

            results = input_fn()

            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(session,
                                                            coord=coord)

            features, target = session.run(results)
            self.assertAllEqual(features['a'], [0, 1])
            self.assertAllEqual(features['b'], [32, 33])
            self.assertAllEqual(target, [-32, -31])

            features, target = session.run(results)
            self.assertAllEqual(features['a'], [2, 3])
            self.assertAllEqual(features['b'], [34, 35])
            self.assertAllEqual(target, [-30, -29])

            features, target = session.run(results)
            self.assertAllEqual(features['a'], [4])
            self.assertAllEqual(features['b'], [36])
            self.assertAllEqual(target, [-28])

            with self.assertRaises(errors.OutOfRangeError):
                session.run(results)

            coord.request_stop()
            coord.join(threads)
예제 #22
0
  def testPandasInputFn_ProducesOutputsWhenDataSizeNotDividedByBatchSize(self):
    if not HAS_PANDAS:
      return
    with self.test_session() as session:
      index = np.arange(100, 105)
      a = np.arange(5)
      b = np.arange(32, 37)
      x = pd.DataFrame({'a': a, 'b': b}, index=index)
      y = pd.Series(np.arange(-32, -27), index=index)

      input_fn = pandas_io.pandas_input_fn(
          x, y, batch_size=2, shuffle=False, num_epochs=1)

      results = input_fn()

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      features, target = session.run(results)
      self.assertAllEqual(features['a'], [0, 1])
      self.assertAllEqual(features['b'], [32, 33])
      self.assertAllEqual(target, [-32, -31])

      features, target = session.run(results)
      self.assertAllEqual(features['a'], [2, 3])
      self.assertAllEqual(features['b'], [34, 35])
      self.assertAllEqual(target, [-30, -29])

      features, target = session.run(results)
      self.assertAllEqual(features['a'], [4])
      self.assertAllEqual(features['b'], [36])
      self.assertAllEqual(target, [-28])

      with self.assertRaises(errors.OutOfRangeError):
        session.run(results)

      coord.request_stop()
      coord.join(threads)
X_train = X_train.copy()
X_test = X_test.copy()
categorical_var_encoders = {}
for var in categorical_vars:
    print var
    le = LabelEncoder().fit(X[var])
    X_train[var + '_ids'] = le.transform(X_train[var])
    X_test[var + '_ids'] = le.transform(X_test[var])
    X_train.pop(var)
    X_test.pop(var)
    categorical_var_encoders[var] = le


input_X = tf.placeholder(dtype=tf.float64)
input_Y = tf.placeholder(dtype=tf.float64)
layers = [30, 20, 30]
network = construct_network(input, layers)
optimizer_po = construct_train_graph(network, input_Y)
with tf.Session() as sess:
    sess.run(optimizer_po, feed_dict={
        input_X: X_train,
        input_Y: y_train
    })


classifier.fit(input_fn=pandas_io.pandas_input_fn(X_train, y_train, num_epochs=10))
preds = list(classifier.predict(input_fn=pandas_io.pandas_input_fn(X_test, num_epochs=1), as_iterable=True))
print preds
print y_test.values
print(mean_squared_error(y_test.values, preds))