Esempio n. 1
0
    def _test_pandas_input_fn_helper(self, fc_impl, fn_to_run):
        """Tests complete flow with pandas_input_fn."""
        if not HAS_PANDAS:
            return
        label_dimension = 1
        batch_size = 10
        data = np.linspace(0., 2., batch_size, dtype=np.float32)
        x = pd.DataFrame({'x': data})
        y = pd.Series(data)
        train_input_fn = pandas_io.pandas_input_fn(x=x,
                                                   y=y,
                                                   batch_size=batch_size,
                                                   num_epochs=None,
                                                   shuffle=True)
        eval_input_fn = pandas_io.pandas_input_fn(x=x,
                                                  y=y,
                                                  batch_size=batch_size,
                                                  shuffle=False)
        predict_input_fn = pandas_io.pandas_input_fn(x=x,
                                                     batch_size=batch_size,
                                                     shuffle=False)

        fn_to_run(train_input_fn=train_input_fn,
                  eval_input_fn=eval_input_fn,
                  predict_input_fn=predict_input_fn,
                  input_dimension=label_dimension,
                  label_dimension=label_dimension,
                  batch_size=batch_size,
                  fc_impl=fc_impl)
Esempio n. 2
0
    def test_pandas_input_fn(self):
        """Tests complete flow with pandas_input_fn."""
        if not HAS_PANDAS:
            return
        input_dimension = 1
        n_classes = 3
        batch_size = 10
        data = np.linspace(0., n_classes - 1., batch_size, dtype=np.float32)
        x = pd.DataFrame({'x': data})
        y = pd.Series(self._as_label(data))
        train_input_fn = pandas_io.pandas_input_fn(x=x,
                                                   y=y,
                                                   batch_size=batch_size,
                                                   num_epochs=None,
                                                   shuffle=True)
        eval_input_fn = pandas_io.pandas_input_fn(x=x,
                                                  y=y,
                                                  batch_size=batch_size,
                                                  shuffle=False)
        predict_input_fn = pandas_io.pandas_input_fn(x=x,
                                                     batch_size=batch_size,
                                                     shuffle=False)

        self._test_complete_flow(train_input_fn=train_input_fn,
                                 eval_input_fn=eval_input_fn,
                                 predict_input_fn=predict_input_fn,
                                 input_dimension=input_dimension,
                                 n_classes=n_classes,
                                 batch_size=batch_size)
Esempio n. 3
0
 def testPandasInputFn_NonBoolShuffle(self):
     if not HAS_PANDAS:
         return
     x, _ = self.makeTestDataFrame()
     y_noindex = pd.Series(np.arange(-32, -28))
     with self.assertRaisesRegexp(
             ValueError, 'shuffle must be provided and explicitly '
             'set as boolean'):
         # Default shuffle is None
         pandas_io.pandas_input_fn(x, y_noindex)
Esempio n. 4
0
 def testPandasInputFn_IndexMismatch(self):
     if not HAS_PANDAS:
         return
     x, _ = self.makeTestDataFrame()
     y_noindex = pd.Series(np.arange(-32, -28))
     with self.assertRaises(ValueError):
         pandas_io.pandas_input_fn(x,
                                   y_noindex,
                                   batch_size=2,
                                   shuffle=False,
                                   num_epochs=1)
Esempio n. 5
0
    def testPandasInputFn_RaisesWhenTargetColumnIsAList(self):
        if not HAS_PANDAS:
            return

        x, y = self.makeTestDataFrame()

        with self.assertRaisesRegexp(TypeError,
                                     'target_column must be a string type'):
            pandas_io.pandas_input_fn(x,
                                      y,
                                      batch_size=2,
                                      shuffle=False,
                                      num_epochs=1,
                                      target_column=['one', 'two'])
Esempio n. 6
0
 def trainingInputFunction(self):
     return pandas_input_fn(self.trainX,
                            self.trainY['prognosis'],
                            batch_size=128,
                            num_epochs=1,
                            shuffle=True,
                            num_threads=1)
Esempio n. 7
0
 def testPandasInputFn_Idempotent(self):
     if not HAS_PANDAS:
         return
     x, y = self.makeTestDataFrame()
     for _ in range(2):
         pandas_io.pandas_input_fn(x,
                                   y,
                                   batch_size=2,
                                   shuffle=False,
                                   num_epochs=1)()
     for _ in range(2):
         pandas_io.pandas_input_fn(x,
                                   y,
                                   batch_size=2,
                                   shuffle=True,
                                   num_epochs=1)()
Esempio n. 8
0
 def testingInputFunction(self):
     return pandas_input_fn(self.testX,
                            self.testY['prognosis'],
                            batch_size=128,
                            num_epochs=1,
                            shuffle=False,
                            num_threads=1)
Esempio n. 9
0
    def testPandasInputFn_ProducesOutputsForLargeBatchAndMultipleEpochs(self):
        if not HAS_PANDAS:
            return
        with self.cached_session() as session:
            index = np.arange(100, 102)
            a = np.arange(2)
            b = np.arange(32, 34)
            x = pd.DataFrame({'a': a, 'b': b}, index=index)
            y = pd.Series(np.arange(-32, -30), index=index)
            input_fn = pandas_io.pandas_input_fn(x,
                                                 y,
                                                 batch_size=128,
                                                 shuffle=False,
                                                 num_epochs=2)

            results = input_fn()

            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(session,
                                                            coord=coord)

            features, target = session.run(results)
            self.assertAllEqual(features['a'], [0, 1, 0, 1])
            self.assertAllEqual(features['b'], [32, 33, 32, 33])
            self.assertAllEqual(target, [-32, -31, -32, -31])

            with self.assertRaises(errors.OutOfRangeError):
                session.run(results)

            coord.request_stop()
            coord.join(threads)
Esempio n. 10
0
def func():
    train_df, test_df = LoadUtil.load_data_df_sk('Social_Network_Ads.csv')
    x_train_df = train_df.iloc[:, 1:4]
    y_train_df = train_df.iloc[:, 4]
    x_test_df = test_df.iloc[:, 1:4]
    y_test_df = test_df.iloc[:, 4]

    PlotUtil.pairplot(x_train_df, hue='Gender')

    feature_columns = [
        fc.categorical_column_with_vocabulary_list('Gender', vocabulary_list=['Male', 'Female']),
        fc.numeric_column('Age', dtype=tf.float32, normalizer_fn=lambda x: (x / np.float32(100))),
        fc.numeric_column('EstimatedSalary', dtype=tf.float32, normalizer_fn=lambda x: (x / np.float32(100000)))]

    classifier = tf.estimator.LinearClassifier(feature_columns=feature_columns, n_classes=2, model_dir='logs/')

    train_input_fn = pandas_input_fn(
        x=x_train_df,
        y=y_train_df,
        num_epochs=None,
        shuffle=True)

    classifier.train(train_input_fn, steps=1000)

    test_input_fn = pandas_input_fn(
        x=x_test_df,
        y=y_test_df,
        num_epochs=1,
        shuffle=False)

    y_pred = list()
    probabilities = classifier.predict(input_fn=test_input_fn)
    for i in range(len(y_test_df.values)):
        ret = next(probabilities)
        classes = ret['classes']
        y_pred.append(int(classes))
        print('test: {}  predict: {}'.format(y_test_df.values[i], classes))

    eval_results = classifier.evaluate(input_fn=test_input_fn)
    for key, value in sorted(eval_results.items()):
        print('%s: %s' % (key, value))

    PlotUtil.display_confusion_matrix(y_test_df.values, y_pred)
def func():
    train_df, test_df = LoadUtil.load_data_df_sk('50_Startups.csv')
    x_train_df = train_df.iloc[:, :4]
    y_train_df = train_df.iloc[:, 4]
    x_test_df = test_df.iloc[:, :4]
    y_test_df = test_df.iloc[:, 4]

    PlotUtil.pairplot(x_train_df)

    feature_columns = [
        fc.numeric_column('RnDSpend', dtype=tf.float32),
        fc.numeric_column('Administration', dtype=tf.float32),
        fc.numeric_column('MarketingSpend', dtype=tf.float32),
        fc.categorical_column_with_vocabulary_list(
            'State', vocabulary_list=['New York', 'California', 'Florida'])
    ]

    train_input_fn = pandas_input_fn(x=x_train_df,
                                     y=y_train_df,
                                     num_epochs=None,
                                     shuffle=True)

    linear_est = tf.estimator.LinearRegressor(feature_columns=feature_columns,
                                              model_dir='logs/')
    linear_est.train(train_input_fn, steps=100)

    test_input_fn = pandas_input_fn(x=x_test_df,
                                    y=y_test_df,
                                    num_epochs=1,
                                    shuffle=False)
    predictions = linear_est.predict(test_input_fn)

    y_pred = list()
    for i in range(len(y_test_df.values)):
        predict = next(predictions)['predictions']
        y_pred.append(predict)

    PlotUtil.display_multiple_linear_result(y_test_df.values,
                                            y_pred,
                                            x_label='#',
                                            y_label='Profit')
Esempio n. 12
0
    def testPandasInputFn_RespectsEpoch_WithShuffle(self):
        if not HAS_PANDAS:
            return
        with self.cached_session() as session:
            x, y = self.makeTestDataFrame()
            input_fn = pandas_io.pandas_input_fn(x,
                                                 y,
                                                 batch_size=4,
                                                 shuffle=True,
                                                 num_epochs=1)

            self.assertInputsCallableNTimes(input_fn, session, 1)
Esempio n. 13
0
    def testPandasInputFn_RespectsEpochUnevenBatches(self):
        if not HAS_PANDAS:
            return
        x, y = self.makeTestDataFrame()
        with self.cached_session() as session:
            input_fn = pandas_io.pandas_input_fn(x,
                                                 y,
                                                 batch_size=3,
                                                 shuffle=False,
                                                 num_epochs=1)

            # Before the last batch, only one element of the epoch should remain.
            self.assertInputsCallableNTimes(input_fn, session, 2)
Esempio n. 14
0
    def testPandasInputFn_ExcludesIndex(self):
        if not HAS_PANDAS:
            return
        with self.cached_session() as session:
            x, y = self.makeTestDataFrame()
            input_fn = pandas_io.pandas_input_fn(x,
                                                 y,
                                                 batch_size=2,
                                                 shuffle=False,
                                                 num_epochs=1)

            features, _ = self.callInputFnOnce(input_fn, session)

            self.assertFalse('index' in features)
Esempio n. 15
0
    def testPandasInputFn_OnlyX(self):
        if not HAS_PANDAS:
            return
        with self.cached_session() as session:
            x, _ = self.makeTestDataFrame()
            input_fn = pandas_io.pandas_input_fn(x,
                                                 y=None,
                                                 batch_size=2,
                                                 shuffle=False,
                                                 num_epochs=1)

            features = self.callInputFnOnce(input_fn, session)

            self.assertAllEqual(features['a'], [0, 1])
            self.assertAllEqual(features['b'], [32, 33])
Esempio n. 16
0
 def predictDisease(self, symptomList):
     symptomList = pd.DataFrame([symptomList], columns=self.featureList)
     input_fn = pandas_input_fn(symptomList,
                                None,
                                batch_size=1,
                                num_epochs=1,
                                shuffle=False,
                                num_threads=1)
     prediction = self.tree.predict(input_fn,
                                    predict_keys=None,
                                    hooks=None,
                                    checkpoint_path=None,
                                    yield_single_examples=True)
     disease = numpy.argmax(list(prediction)[0]['logits'])
     return self.diseases[int(disease)]
Esempio n. 17
0
    def testPandasInputFn_ProducesExpectedOutputs(self):
        if not HAS_PANDAS:
            return
        with self.cached_session() as session:
            x, y = self.makeTestDataFrame()
            input_fn = pandas_io.pandas_input_fn(x,
                                                 y,
                                                 batch_size=2,
                                                 shuffle=False,
                                                 num_epochs=1)

            features, target = self.callInputFnOnce(input_fn, session)

            self.assertAllEqual(features['a'], [0, 1])
            self.assertAllEqual(features['b'], [32, 33])
            self.assertAllEqual(target, [-32, -31])
Esempio n. 18
0
    def testPandasInputFnYIsDataFrame_HandlesOverlappingColumnsInTargets(self):
        if not HAS_PANDAS:
            return
        with self.cached_session() as session:
            x, y = self.makeTestDataFrameWithYAsDataFrame()
            y = y.rename(columns={'a_target': 'a', 'b_target': 'a_n'})
            input_fn = pandas_io.pandas_input_fn(x,
                                                 y,
                                                 batch_size=2,
                                                 shuffle=False,
                                                 num_epochs=1)

            features, targets = self.callInputFnOnce(input_fn, session)

            self.assertAllEqual(features['a'], [0, 1])
            self.assertAllEqual(features['b'], [32, 33])
            self.assertAllEqual(targets['a'], [10, 11])
            self.assertAllEqual(targets['a_n'], [50, 51])
Esempio n. 19
0
    def testPandasInputFn_ProducesOutputsWhenDataSizeNotDividedByBatchSize(
            self):
        if not HAS_PANDAS:
            return
        with self.cached_session() as session:
            index = np.arange(100, 105)
            a = np.arange(5)
            b = np.arange(32, 37)
            x = pd.DataFrame({'a': a, 'b': b}, index=index)
            y = pd.Series(np.arange(-32, -27), index=index)

            input_fn = pandas_io.pandas_input_fn(x,
                                                 y,
                                                 batch_size=2,
                                                 shuffle=False,
                                                 num_epochs=1)

            results = input_fn()

            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(session,
                                                            coord=coord)

            features, target = session.run(results)
            self.assertAllEqual(features['a'], [0, 1])
            self.assertAllEqual(features['b'], [32, 33])
            self.assertAllEqual(target, [-32, -31])

            features, target = session.run(results)
            self.assertAllEqual(features['a'], [2, 3])
            self.assertAllEqual(features['b'], [34, 35])
            self.assertAllEqual(target, [-30, -29])

            features, target = session.run(results)
            self.assertAllEqual(features['a'], [4])
            self.assertAllEqual(features['b'], [36])
            self.assertAllEqual(target, [-28])

            with self.assertRaises(errors.OutOfRangeError):
                session.run(results)

            coord.request_stop()
            coord.join(threads)