def testPandasInputFn(self): if not HAS_PANDAS: return index = np.arange(100, 104) a = np.arange(4) b = np.arange(32, 36) x = pd.DataFrame({'a': a, 'b': b}, index=index) y_noindex = pd.Series(np.arange(-32, -28)) y = pd.Series(np.arange(-32, -28), index=index) with self.test_session() as session: with self.assertRaises(ValueError): failing_input_fn = pandas_io.pandas_input_fn( x, y_noindex, batch_size=2, shuffle=False, num_epochs=1) failing_input_fn() input_fn = pandas_io.pandas_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1) features, target = input_fn() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(session, coord=coord) res = session.run([features, target]) self.assertAllEqual(res[0]['index'], [100, 101]) self.assertAllEqual(res[0]['a'], [0, 1]) self.assertAllEqual(res[0]['b'], [32, 33]) self.assertAllEqual(res[1], [-32, -31]) session.run([features, target]) with self.assertRaises(errors.OutOfRangeError): session.run([features, target]) coord.request_stop() coord.join(threads)
def testPandasInputFn_IndexMismatch(self): if not HAS_PANDAS: return x, _ = self.makeTestDataFrame() y_noindex = pd.Series(np.arange(-32, -28)) with self.assertRaises(ValueError): pandas_io.pandas_input_fn( x, y_noindex, batch_size=2, shuffle=False, num_epochs=1)
def testPandasInputFn_Idempotent(self): if not HAS_PANDAS: return x, y = self.makeTestDataFrame() for _ in range(2): pandas_io.pandas_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1)() for _ in range(2): pandas_io.pandas_input_fn( x, y, batch_size=2, shuffle=True, num_epochs=1)()
def testPandasInputFn_IndexMismatch(self): if not HAS_PANDAS: return x, _ = self.makeTestDataFrame() y_noindex = pd.Series(np.arange(-32, -28)) with self.assertRaises(ValueError): pandas_io.pandas_input_fn(x, y_noindex, batch_size=2, shuffle=False, num_epochs=1)
def testPandasInputFn_ProducesOutputsForLargeBatchAndMultipleEpochs(self): if not HAS_PANDAS: return with self.test_session() as session: index = np.arange(100, 102) a = np.arange(2) b = np.arange(32, 34) x = pd.DataFrame({'a': a, 'b': b}, index=index) y = pd.Series(np.arange(-32, -30), index=index) input_fn = pandas_io.pandas_input_fn(x, y, batch_size=128, shuffle=False, num_epochs=2) results = input_fn() coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) features, target = session.run(results) self.assertAllEqual(features['a'], [0, 1, 0, 1]) self.assertAllEqual(features['b'], [32, 33, 32, 33]) self.assertAllEqual(target, [-32, -31, -32, -31]) with self.assertRaises(errors.OutOfRangeError): session.run(results) coord.request_stop() coord.join(threads)
def testPandasInputFn_Idempotent(self): if not HAS_PANDAS: return x, y = self.makeTestDataFrame() for _ in range(2): pandas_io.pandas_input_fn(x, y, batch_size=2, shuffle=False, num_epochs=1)() for _ in range(2): pandas_io.pandas_input_fn(x, y, batch_size=2, shuffle=True, num_epochs=1)()
def testPandasInputFn_ProducesOutputsForLargeBatchAndMultipleEpochs(self): if not HAS_PANDAS: return with self.test_session() as session: index = np.arange(100, 102) a = np.arange(2) b = np.arange(32, 34) x = pd.DataFrame({'a': a, 'b': b}, index=index) y = pd.Series(np.arange(-32, -30), index=index) input_fn = pandas_io.pandas_input_fn( x, y, batch_size=128, shuffle=False, num_epochs=2) results = input_fn() coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) features, target = session.run(results) self.assertAllEqual(features['a'], [0, 1, 0, 1]) self.assertAllEqual(features['b'], [32, 33, 32, 33]) self.assertAllEqual(target, [-32, -31, -32, -31]) with self.assertRaises(errors.OutOfRangeError): session.run(results) coord.request_stop() coord.join(threads)
def testPandasInputFn_RespectsEpoch_NoShuffle(self): if not HAS_PANDAS: return with self.test_session() as session: x, y = self.makeTestDataFrame() input_fn = pandas_io.pandas_input_fn( x, y, batch_size=4, shuffle=False, num_epochs=1) self.assertInputsCallableNTimes(input_fn, session, 1)
def testPandasInputFn_RespectsEpoch_WithShuffleAutosize(self): if not HAS_PANDAS: return with self.test_session() as session: x, y = self.makeTestDataFrame() input_fn = pandas_io.pandas_input_fn( x, y, batch_size=2, shuffle=True, queue_capacity=None, num_epochs=2) self.assertInputsCallableNTimes(input_fn, session, 4)
def testPandasInputFn_RespectsEpochUnevenBatches(self): if not HAS_PANDAS: return x, y = self.makeTestDataFrame() with self.test_session() as session: input_fn = pandas_io.pandas_input_fn( x, y, batch_size=3, shuffle=False, num_epochs=1) # Before the last batch, only one element of the epoch should remain. self.assertInputsCallableNTimes(input_fn, session, 2)
def testPandasInputFn_ExcludesIndex(self): if not HAS_PANDAS: return with self.test_session() as session: x, y = self.makeTestDataFrame() input_fn = pandas_io.pandas_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1) features, _ = self.callInputFnOnce(input_fn, session) self.assertFalse('index' in features)
def testPandasInputFn_OnlyX(self): if not HAS_PANDAS: return with self.test_session() as session: x, _ = self.makeTestDataFrame() input_fn = pandas_io.pandas_input_fn( x, y=None, batch_size=2, shuffle=False, num_epochs=1) features = self.callInputFnOnce(input_fn, session) self.assertAllEqual(features['a'], [0, 1]) self.assertAllEqual(features['b'], [32, 33])
def testPandasInputFn_RespectsEpoch_WithShuffle(self): if not HAS_PANDAS: return with self.test_session() as session: x, y = self.makeTestDataFrame() input_fn = pandas_io.pandas_input_fn(x, y, batch_size=4, shuffle=True, num_epochs=1) self.assertInputsCallableNTimes(input_fn, session, 1)
def testPandasInputFn(self): if not HAS_PANDAS: return index = np.arange(100, 104) a = np.arange(4) b = np.arange(32, 36) x = pd.DataFrame({'a': a, 'b': b}, index=index) y_noindex = pd.Series(np.arange(-32, -28)) y = pd.Series(np.arange(-32, -28), index=index) with self.test_session() as session: with self.assertRaises(ValueError): failing_input_fn = pandas_io.pandas_input_fn(x, y_noindex, batch_size=2, shuffle=False, num_epochs=1) failing_input_fn() input_fn = pandas_io.pandas_input_fn(x, y, batch_size=2, shuffle=False, num_epochs=1) features, target = input_fn() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(session, coord=coord) res = session.run([features, target]) self.assertAllEqual(res[0]['index'], [100, 101]) self.assertAllEqual(res[0]['a'], [0, 1]) self.assertAllEqual(res[0]['b'], [32, 33]) self.assertAllEqual(res[1], [-32, -31]) session.run([features, target]) with self.assertRaises(errors.OutOfRangeError): session.run([features, target]) coord.request_stop() coord.join(threads)
def testPandasInputFn_RespectsEpochUnevenBatches(self): if not HAS_PANDAS: return x, y = self.makeTestDataFrame() with self.test_session() as session: input_fn = pandas_io.pandas_input_fn(x, y, batch_size=3, shuffle=False, num_epochs=1) # Before the last batch, only one element of the epoch should remain. self.assertInputsCallableNTimes(input_fn, session, 2)
def testPandasInputFn_RespectsEpoch_WithShuffleAutosize(self): if not HAS_PANDAS: return with self.cached_session() as session: x, y = self.makeTestDataFrame() input_fn = pandas_io.pandas_input_fn(x, y, batch_size=2, shuffle=True, queue_capacity=None, num_epochs=2) self.assertInputsCallableNTimes(input_fn, session, 4)
def testPandasInputFn_ProducesExpectedOutputs(self): if not HAS_PANDAS: return with self.test_session() as session: x, y = self.makeTestDataFrame() input_fn = pandas_io.pandas_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1) features, target = self.callInputFnOnce(input_fn, session) self.assertAllEqual(features['a'], [0, 1]) self.assertAllEqual(features['b'], [32, 33]) self.assertAllEqual(target, [-32, -31])
def testPandasInputFn_ExcludesIndex(self): if not HAS_PANDAS: return with self.test_session() as session: x, y = self.makeTestDataFrame() input_fn = pandas_io.pandas_input_fn(x, y, batch_size=2, shuffle=False, num_epochs=1) features, _ = self.callInputFnOnce(input_fn, session) self.assertFalse('index' in features)
def testPandasInputFn_OnlyX(self): if not HAS_PANDAS: return with self.test_session() as session: x, _ = self.makeTestDataFrame() input_fn = pandas_io.pandas_input_fn(x, y=None, batch_size=2, shuffle=False, num_epochs=1) features = self.callInputFnOnce(input_fn, session) self.assertAllEqual(features['a'], [0, 1]) self.assertAllEqual(features['b'], [32, 33])
def testPandasInputFn_ProducesExpectedOutputs(self): if not HAS_PANDAS: return with self.test_session() as session: x, y = self.makeTestDataFrame() input_fn = pandas_io.pandas_input_fn(x, y, batch_size=2, shuffle=False, num_epochs=1) features, target = self.callInputFnOnce(input_fn, session) self.assertAllEqual(features['a'], [0, 1]) self.assertAllEqual(features['b'], [32, 33]) self.assertAllEqual(target, [-32, -31])
def testPandasInputFn_ProducesOutputsWhenDataSizeNotDividedByBatchSize( self): if not HAS_PANDAS: return with self.test_session() as session: index = np.arange(100, 105) a = np.arange(5) b = np.arange(32, 37) x = pd.DataFrame({'a': a, 'b': b}, index=index) y = pd.Series(np.arange(-32, -27), index=index) input_fn = pandas_io.pandas_input_fn(x, y, batch_size=2, shuffle=False, num_epochs=1) results = input_fn() coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) features, target = session.run(results) self.assertAllEqual(features['a'], [0, 1]) self.assertAllEqual(features['b'], [32, 33]) self.assertAllEqual(target, [-32, -31]) features, target = session.run(results) self.assertAllEqual(features['a'], [2, 3]) self.assertAllEqual(features['b'], [34, 35]) self.assertAllEqual(target, [-30, -29]) features, target = session.run(results) self.assertAllEqual(features['a'], [4]) self.assertAllEqual(features['b'], [36]) self.assertAllEqual(target, [-28]) with self.assertRaises(errors.OutOfRangeError): session.run(results) coord.request_stop() coord.join(threads)
def testPandasInputFn_ProducesOutputsWhenDataSizeNotDividedByBatchSize(self): if not HAS_PANDAS: return with self.test_session() as session: index = np.arange(100, 105) a = np.arange(5) b = np.arange(32, 37) x = pd.DataFrame({'a': a, 'b': b}, index=index) y = pd.Series(np.arange(-32, -27), index=index) input_fn = pandas_io.pandas_input_fn( x, y, batch_size=2, shuffle=False, num_epochs=1) results = input_fn() coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) features, target = session.run(results) self.assertAllEqual(features['a'], [0, 1]) self.assertAllEqual(features['b'], [32, 33]) self.assertAllEqual(target, [-32, -31]) features, target = session.run(results) self.assertAllEqual(features['a'], [2, 3]) self.assertAllEqual(features['b'], [34, 35]) self.assertAllEqual(target, [-30, -29]) features, target = session.run(results) self.assertAllEqual(features['a'], [4]) self.assertAllEqual(features['b'], [36]) self.assertAllEqual(target, [-28]) with self.assertRaises(errors.OutOfRangeError): session.run(results) coord.request_stop() coord.join(threads)
X_train = X_train.copy() X_test = X_test.copy() categorical_var_encoders = {} for var in categorical_vars: print var le = LabelEncoder().fit(X[var]) X_train[var + '_ids'] = le.transform(X_train[var]) X_test[var + '_ids'] = le.transform(X_test[var]) X_train.pop(var) X_test.pop(var) categorical_var_encoders[var] = le input_X = tf.placeholder(dtype=tf.float64) input_Y = tf.placeholder(dtype=tf.float64) layers = [30, 20, 30] network = construct_network(input, layers) optimizer_po = construct_train_graph(network, input_Y) with tf.Session() as sess: sess.run(optimizer_po, feed_dict={ input_X: X_train, input_Y: y_train }) classifier.fit(input_fn=pandas_io.pandas_input_fn(X_train, y_train, num_epochs=10)) preds = list(classifier.predict(input_fn=pandas_io.pandas_input_fn(X_test, num_epochs=1), as_iterable=True)) print preds print y_test.values print(mean_squared_error(y_test.values, preds))