Exemple #1
0
  def testConfig(self):
    module_path = os.path.join(self.get_temp_dir(), "module")
    export_module_spec(self.spec, module_path)
    text_column = hub.text_embedding_column("text", module_path)
    config = text_column.get_config()
    cloned_text_column = hub.feature_column._TextEmbeddingColumn.from_config(
        config)
    self.assertEqual(cloned_text_column.module_spec_path,
                     text_column.module_spec_path)

    with self.assertRaisesRegexp(NotImplementedError, "Can only generate"):
      text_column = hub.text_embedding_column("text", self.spec)
      config = text_column.get_config()
Exemple #2
0
 def testInputLayer(self):
   features = {
       "text_a": ["hello world", "pair-programming"],
       "text_b": ["hello world", "oov token"],
   }
   feature_columns = [
       hub.text_embedding_column("text_a", self.spec, trainable=False),
       hub.text_embedding_column("text_b", self.spec, trainable=False),
   ]
   with tf.Graph().as_default():
     input_layer = tf.feature_column.input_layer(features, feature_columns)
     with tf.train.MonitoredSession() as sess:
       output = sess.run(input_layer)
       self.assertAllEqual(output, [[1, 2, 3, 4, 1, 2, 3, 4],
                                    [5, 5, 5, 5, 0, 0, 0, 0]])
Exemple #3
0
def build_model(directory="train_data.csv"):
    train_df, test_df = load_directory_data_(directory)
    # Training input on the whole training set with no limit on training epochs.
    train_input_fn = tf.estimator.inputs.pandas_input_fn(
        train_df, train_df["label"], num_epochs=None, shuffle=True)

    # Prediction on the whole training set.
    predict_train_input_fn = tf.estimator.inputs.pandas_input_fn(
        train_df, train_df["label"], shuffle=False)
    # Prediction on the test set.
    predict_test_input_fn = tf.estimator.inputs.pandas_input_fn(
        test_df, test_df["label"], shuffle=False)

    tf.logging.info("loading embeddings..")
    embedded_text_feature_column = hub.text_embedding_column(
        key="sentence",
        module_spec="https://tfhub.dev/google/nnlm-en-dim128/1")
    tf.logging.info("finished loading embeddings...")

    estimator = tf.estimator.DNNClassifier(
        hidden_units=[500, 500],
        feature_columns=[embedded_text_feature_column],
        n_classes=5,
        optimizer=tf.train.AdamOptimizer(learning_rate=0.003),
        model_dir=MODE_DIR)

    estimator.train(input_fn=train_input_fn, steps=250000)

    train_eval_result = estimator.evaluate(input_fn=predict_train_input_fn)
    test_eval_result = estimator.evaluate(input_fn=predict_test_input_fn)

    tf.logging.info("Training set accuracy: {accuracy}".format(**train_eval_result))
    tf.logging.info("Test set accuracy: {accuracy}".format(**test_eval_result))
def train_and_evaluate_with_module(hub_module,
                                   train_input_fn,
                                   predict_train_input_fn,
                                   predict_dev_input_fn,
                                   train_module=False):
    embedded_text_feature_column = hub.text_embedding_column(
        key="sentence", module_spec=hub_module, trainable=train_module)

    estimator = tf.estimator.DNNClassifier(
        hidden_units=[500, 100],
        feature_columns=[embedded_text_feature_column],
        n_classes=N_CLASSES,
        optimizer=tf.train.AdagradOptimizer(learning_rate=0.003))

    estimator.train(input_fn=train_input_fn, steps=1000)

    train_eval_result = estimator.evaluate(input_fn=predict_train_input_fn)
    dev_eval_result = estimator.evaluate(input_fn=predict_dev_input_fn)
    training_set_accuracy = train_eval_result["accuracy"]
    dev_set_accuracy = dev_eval_result["accuracy"]

    return {
        "training accuracy": training_set_accuracy,
        "development accuracy": dev_set_accuracy
    }
  def testDenseFeatures_shareAcrossApplication(self):
    features = {
        "text": ["hello world", "pair-programming"],
    }
    feature_columns = [
        hub.text_embedding_column("text", self.spec, trainable=True),
    ]
    if not feature_column_v2.is_feature_column_v2(feature_columns):
      self.skipTest("Resources not implemented in the state manager of feature "
                    "column v2.")
    with tf.Graph().as_default():
      # We want to test with dense_features_v2.DenseFeatures. This symbol was
      # added in https://github.com/tensorflow/tensorflow/commit/64586f18724f737393071125a91b19adf013cf8a.
      feature_layer = tf.compat.v2.keras.layers.DenseFeatures(feature_columns)
      feature_layer_out_1 = feature_layer(features)
      feature_layer_out_2 = feature_layer(features)

      # We define loss only on the first layer. Since layers should have shared
      # weights, we expect the second layer will change too.
      loss = feature_layer_out_1 - tf.constant(0.005)
      optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.7)
      train_op = optimizer.minimize(loss)

      with tf.compat.v1.train.MonitoredSession() as sess:
        before_update_1 = sess.run(feature_layer_out_1)
        sess.run(train_op)
        after_update_1 = sess.run(feature_layer_out_1)
        after_update_2 = sess.run(feature_layer_out_2)

        self.assertAllEqual(before_update_1, [[1, 2, 3, 4],
                                              [5, 5, 5, 5]])
        self.assertAllEqual(after_update_1, after_update_2)
    def testTrainableEmbeddingColumn(self):
        feature_columns = [
            hub.text_embedding_column("text", self.spec, trainable=True),
        ]

        with tf.Graph().as_default():
            features = {
                "text": ["hello world", "pair-programming"],
            }
            target = [[1, 1, 1, 1], [4, 3, 2, 1]]
            input_layer = tf_v1.feature_column.input_layer(
                features, feature_columns)

            loss = tf.cast(
                tf_v1.losses.mean_squared_error(input_layer, target),
                tf.float64)
            optimizer = tf_v1.train.GradientDescentOptimizer(
                learning_rate=0.97)
            train_op = optimizer.minimize(loss)

            with tf_v1.train.MonitoredSession() as sess:
                self.assertAllEqual(sess.run(input_layer),
                                    [[1, 2, 3, 4], [5, 5, 5, 5]])
                for _ in range(10):
                    sess.run(train_op)
                self.assertAllClose(sess.run(input_layer), target, atol=0.5)
def train_classifier_idealist(X_train, path=None):
    train_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(
        X_train, X_train["Spam"], num_epochs=None, shuffle=True)

    embedded_text_feature_column = hub.text_embedding_column(
        key="DESCRIPTION",
        module_spec="https://tfhub.dev/google/nnlm-en-dim128/1")
    estimator = tf.compat.v1.estimator.DNNClassifier(
        hidden_units=[500, 100],
        feature_columns=[embedded_text_feature_column],
        n_classes=2,
        model_dir=path,
        optimizer=tf.compat.v1.train.AdagradOptimizer(learning_rate=0.003))
    estimator.train(input_fn=train_input_fn, steps=100)
    if not path is None:
        try:
            serving_input_fn = tf.compat.v1.estimator.export.build_parsing_serving_input_receiver_fn(
                tf.feature_column.make_parse_example_spec(
                    [embedded_text_feature_column]))
            #export_path = estimator.export_saved_model(path, serving_input_fn)
            export_path = estimator.latest_checkpoint()
            with open((path + "/loadPath.txt"), "wb") as fp:  # Pickling
                pickle.dump(export_path, fp)
        except:
            print("Could not save USE for ", path)
    return estimator
Exemple #8
0
  def testDenseFeatures_shareAcrossApplication(self):
    features = {
        "text": ["hello world", "pair-programming"],
    }
    feature_columns = [
        hub.text_embedding_column("text", self.spec, trainable=True),
    ]
    if not feature_column_v2.is_feature_column_v2(feature_columns):
      self.skipTest("Resources not implemented in the state manager of feature "
                    "column v2.")
    with tf.Graph().as_default():
      feature_layer = _dense_features_module.DenseFeatures(feature_columns)
      feature_layer_out_1 = feature_layer(features)
      feature_layer_out_2 = feature_layer(features)

      # We define loss only on the first layer. Since layers should have shared
      # weights, we expect the second layer will change too.
      loss = feature_layer_out_1 - tf.constant(0.005)
      optimizer = tf_v1.train.GradientDescentOptimizer(learning_rate=0.7)
      train_op = optimizer.minimize(loss)

      with tf_v1.train.MonitoredSession() as sess:
        before_update_1 = sess.run(feature_layer_out_1)
        sess.run(train_op)
        after_update_1 = sess.run(feature_layer_out_1)
        after_update_2 = sess.run(feature_layer_out_2)

        self.assertAllEqual(before_update_1, [[1, 2, 3, 4],
                                              [5, 5, 5, 5]])
        self.assertAllEqual(after_update_1, after_update_2)
 def testMakeParseExampleSpec(self):
     text_column = hub.text_embedding_column("text",
                                             self.spec,
                                             trainable=False)
     parsing_spec = tf.feature_column.make_parse_example_spec([text_column])
     self.assertEqual(parsing_spec,
                      {"text": tf.FixedLenFeature([1], dtype=tf.string)})
    def tensorflowPredict(self, description):
        
        descriptions = []
        descriptions.append(description)
        embedded_text_feature_column = hub.text_embedding_column(
            key="name_and_description",
            module_spec="https://tfhub.dev/google/nnlm-en-dim128/1")

        estimator = tf.estimator.DNNClassifier(
            hidden_units=[500, 100],
            model_dir='tensorflowmodel',
            feature_columns=[embedded_text_feature_column],
            n_classes=2,
            optimizer=tf.compat.v1.train.AdagradOptimizer(learning_rate=0.05))

        predict_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn({"name_and_description": np.array(descriptions).astype(np.str)}, shuffle=False)

        results = estimator.predict(
            predict_input_fn
        )
        # for result in results:
            # print(result['class_ids'][0])
            # top_2 = result['probabilities'].argsort()[-2:][::-1]
            # for genre in top_2:
            #     print('result ' + ': ' + str(round(result['probabilities'][genre] * 100, 2)) + '%')
            # print('')
            # print(result)
        # predictions = np.array([item['class_ids'][0] for item in results])
        predictions = np.array([item ['class_ids'][0]for item in results])

        return "Prediction: {}".format(str(predictions))
Exemple #11
0
  def testWorksWithCannedEstimator(self):
    comment_embedding_column = hub.text_embedding_column(
        "comment", self.spec, trainable=False)
    upvotes = tf.feature_column.numeric_column("upvotes")

    feature_columns = [comment_embedding_column, upvotes]
    estimator = tf.estimator.DNNClassifier(
        hidden_units=[10],
        feature_columns=feature_columns,
        model_dir=self.get_temp_dir())

    # This only tests that estimator apis are working with the feature
    # column without throwing exceptions.
    features = {
        "comment": np.array([
            ["the quick brown fox"],
            ["spam spam spam"],
        ]),
        "upvotes": np.array([
            [20],
            [1],
        ]),
    }
    labels = np.array([[1], [0]])
    input_fn = tf.estimator.inputs.numpy_input_fn(
        features, labels, shuffle=True)
    estimator.train(input_fn, max_steps=1)
    estimator.evaluate(input_fn, steps=1)
    estimator.predict(input_fn)
Exemple #12
0
def predict(X_test):
    # ...
    dataKey = 'Question'
    labelKey = 'y'
    full_model_dir = "/home/sbs/Desktop/Dev/ChatBot/EstimatorModels"
    full_model_dir = sorted(glob.glob(os.path.join(full_model_dir, '*/')),
                            key=os.path.getmtime)[-1]
    embeded_text_url = "https://tfhub.dev/google/nnlm-en-dim128/1"
    embedded_text_feature_column = hub.text_embedding_column(
        key=dataKey, module_spec=embeded_text_url)
    # ...
    with tf.Session() as sess:
        tf.saved_model.loader.load(sess,
                                   [tf.saved_model.tag_constants.SERVING],
                                   full_model_dir)
        predictor = tf.contrib.predictor.from_saved_model(full_model_dir)
        # Prediction on the test set.
        predict_test_input_fn = tf.estimator.inputs.pandas_input_fn(
            X_test, X_test[labelKey], shuffle=True)

        model_input = tf.train.Example(features=predict_test_input_fn())
        model_input = model_input.SerializeToString()
        output_dict = predictor({"y": [model_input]})
        # # y_predicted = output_dict["pred_output_classes"][0]
        print output_dict
def train_and_evaluate_with_module(hub_module, train_module=False):
    """
    train and load Training accuracy and Test accuracy

    parameter: hub_model - check from https://www.tensorflow.org/resources/models-datasets
               train_model - whether to retrain pretrained model
    """

    embedded_text_feature_column = hub.text_embedding_column(
        key="text", module_spec=hub_module, trainable=train_module)

    estimator = tf.estimator.DNNClassifier(
        hidden_units=[500, 100],
        feature_columns=[embedded_text_feature_column],
        n_classes=2,
        optimizer=tf.train.AdagradOptimizer(learning_rate=0.003))

    estimator.train(input_fn=train_input_fn, steps=1000)

    train_eval_result = estimator.evaluate(input_fn=predict_train_input_fn)
    test_eval_result = estimator.evaluate(input_fn=predict_test_input_fn)

    training_set_accuracy = train_eval_result["accuracy"]
    test_set_accuracy = test_eval_result["accuracy"]

    return {
        "Training accuracy": training_set_accuracy,
        "Test accuracy": test_set_accuracy
    }
    def _model_fn(self, features, labels, mode, params, config):
        embedded_text_feature_column = hub.text_embedding_column(
            key=base_model.TEXT_FEATURE_KEY,
            module_spec=FLAGS.model_spec,
            trainable=FLAGS.trainable)
        inputs = tf.feature_column.input_layer(features,
                                               [embedded_text_feature_column])

        batch_size = tf.shape(inputs)[0]

        logits = inputs
        for num_units in params.dense_units:
            logits = tf.layers.dense(inputs=logits,
                                     units=num_units,
                                     activation=tf.nn.relu)
            logits = tf.layers.dropout(logits, rate=params.dropout_rate)
        logits = tf.layers.dense(inputs=logits,
                                 units=len(self._target_labels),
                                 activation=None)

        output_heads = [
            tf.contrib.estimator.binary_classification_head(
                name=name, weight_column=name + '_weight')
            for name in self._target_labels
        ]
        multihead = tf.contrib.estimator.multi_head(output_heads)

        optimizer = tf.train.AdamOptimizer(learning_rate=params.learning_rate)
        return multihead.create_estimator_spec(features=features,
                                               labels=labels,
                                               mode=mode,
                                               logits=logits,
                                               optimizer=optimizer)
Exemple #15
0
def object_recognition_classifier(number_of_classes, path_test_dataset,
                                  path_model, path_module, reject,
                                  path_predicted_labels):

    test_df = download_and_load_datasets_test(path_test_dataset)
    # Prediction on the test set.
    predict_test_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(
        test_df, test_df["polarity"], shuffle=False)

    embedded_text_feature_column = hub.text_embedding_column(
        key="coordonates", module_spec=path_module)

    loaded_ckpt = tf.train.load_checkpoint(path_model)
    estimator_loaded = tf.estimator.DNNClassifier(
        hidden_units=[500, 100],
        feature_columns=[embedded_text_feature_column],
        n_classes=number_of_classes,
        warm_start_from=path_model)

    tab_predictions, tab_probabilities = best_classes_and_probabilities(
        estimator_loaded, predict_test_input_fn)

    tab_predictions_with_reject = ambiguity_reject(tab_predictions,
                                                   tab_probabilities, reject,
                                                   number_of_classes)

    trgt_predict_txt = open(path_predicted_labels, 'w')
    trgt_predict_txt.writelines(str(tab_predictions_with_reject))
    trgt_predict_txt.close()

    return
Exemple #16
0
def train_and_evaluate_with_module(hub_module,
                                   train_module=False,
                                   rate=0.003,
                                   steps=1000,
                                   hunits=[500, 100],
                                   nc=2):
    start_time = time.time()
    embedded_text_feature_column = hub.text_embedding_column(
        key="content", module_spec=hub_module, trainable=train_module)

    estimator = tf.estimator.DNNClassifier(
        hidden_units=hunits,
        feature_columns=[embedded_text_feature_column],
        n_classes=nc,
        optimizer=tf.train.AdagradOptimizer(learning_rate=rate))

    estimator.train(input_fn=train_input_fn, steps=steps)

    train_eval_result = estimator.evaluate(input_fn=predict_train_input_fn)
    test_eval_result = estimator.evaluate(input_fn=predict_test_input_fn)

    training_set_accuracy = train_eval_result["accuracy"]
    test_set_accuracy = test_eval_result["accuracy"]

    return {
        "Training accuracy": training_set_accuracy,
        "Test accuracy": test_set_accuracy,
        "Learning rate": rate,
        "Steps": steps,
        "n classes": nc,
        "Hidden units": hunits,
        "Time elapsed": round(time.time() - start_time),
    }
Exemple #17
0
def train_and_evaluate_with_sentence_encoder(hub_module, train_module=False, path=''):
    embedding_feature = hub.text_embedding_column(
        key='sentence', module_spec=hub_module, trainable=train_module)
  
    print('Training with', hub_module)
    print('Trainable is:', train_module)
  
    dnn = tf.estimator.DNNClassifier(
        hidden_units=[512, 128],
        feature_columns=[embedding_feature],
        n_classes=2,
        activation_fn=tf.nn.relu,
        dropout=0.1,
        optimizer=tf.train.AdagradOptimizer(learning_rate=0.005),
        model_dir=path,
        config=my_checkpointing_config)

    for step in range(0, TOTAL_STEPS+1, STEP_SIZE):
        print('Training for step =', step)
        dnn.train(input_fn=train_input_fn, steps=STEP_SIZE)
        print('Eval Metrics (Train):', dnn.evaluate(input_fn=predict_train_input_fn))
        print('Eval Metrics (Validation):', dnn.evaluate(input_fn=predict_val_input_fn))
        print('\n')
    
    predictions_train = get_predictions(estimator=dnn, input_fn=predict_train_input_fn)
    predictions_dev = get_predictions(estimator=dnn, input_fn=predict_test_input_fn)
    return predictions_train, predictions_dev, dnn
    def test(self, test, version):

        disable_gpu()
        test_utterances = test['utterance'].astype('str')
        test_intents = test['intent']

        test['predict_intent'] = ''
        test['match'] = 0

        encoder = LabelEncoder()
        encoder.fit_transform(test_intents)
        test_encoded = encoder.transform(test_intents)
        num_classes = len(encoder.classes_)

        embeddings = hub.text_embedding_column(
            'utterance',
            module_spec=MODULE_SPEC[self.embedding],
            trainable=False)

        multi_class_head = tf.contrib.estimator.multi_class_head(
            num_classes,
            loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE)

        estimator = tf.contrib.estimator.DNNEstimator(
            head=multi_class_head,
            hidden_units=[64, 10],
            model_dir='models/tf/benchmark/' + self.embedding + '/' + version,
            feature_columns=[embeddings])

        predict_input_fn = tf.estimator.inputs.numpy_input_fn(
            {"utterance": np.array(test_utterances).astype(np.str)},
            shuffle=False)
        results = estimator.predict(predict_input_fn)

        index = 0
        total = len(test)
        predict_intent_idx = test.columns.get_loc('predict_intent')
        match_idx = test.columns.get_loc('match')
        # Display predictions
        for result in results:
            idx = np.argmax(result['probabilities'])
            intent = encoder.classes_[idx]
            row = test.iloc[index]
            test.iat[index, predict_intent_idx] = intent
            if row['intent'] == intent:
                test.iat[index, match_idx] = 1
            index += 1
            printProgress(index, total)

        # Percentage of correct predictions
        missed = test[test['match'] == 0]
        accuracy = 100 * (1 - len(missed) / len(test))
        print(
            'DNN NLU scores %0.2f%% with %d false predictions in total %d samples'
            % (accuracy, len(missed), len(test)))
        save_csv(missed,
                 'missed/' + version + '/' + self.embedding + '.tf.csv')
        result = test['match'].value_counts()
        return result
Exemple #19
0
def classify(train_df: pd.DataFrame, test_df: pd.DataFrame):
    # Training input on the whole training set with no limit on training epochs.
    train_input_fn = tf.estimator.inputs.pandas_input_fn(train_df,
                                                         train_df["class"],
                                                         num_epochs=50,
                                                         shuffle=True)

    # Prediction on the whole training set.
    predict_train_input_fn = tf.estimator.inputs.pandas_input_fn(
        train_df, train_df["class"], shuffle=False)
    # Prediction on the test set.
    predict_test_input_fn = tf.estimator.inputs.pandas_input_fn(
        test_df, test_df["class"], shuffle=False)

    print("Download pretrained model")
    embed = hub.text_embedding_column(
        key="sentence",
        module_spec="https://tfhub.dev/google/nnlm-de-dim128/1",
        trainable=True)

    print("Train estimator")
    estimator = tf.estimator.DNNClassifier(
        hidden_units=[500, 100],
        feature_columns=[embed],
        n_classes=3,
        dropout=0.6,
        optimizer=tf.train.AdagradOptimizer(learning_rate=0.003))

    estimator.train(input_fn=train_input_fn)

    print("Evaluate estimator")
    train_eval_result = estimator.evaluate(input_fn=predict_train_input_fn)
    test_eval_result = estimator.evaluate(input_fn=predict_test_input_fn)

    training_set_accuracy = train_eval_result["accuracy"]
    test_set_accuracy = test_eval_result["accuracy"]

    print("Training set accuracy: ")
    print(training_set_accuracy)
    print("Test set accuracy: ")
    print(test_set_accuracy)

    LABELS = ["positive", "negative", "neutral"]

    # Create a confusion matrix on training data.
    with tf.Graph().as_default():
        cm = tf.confusion_matrix(
            test_df["class"], get_predictions(estimator,
                                              predict_test_input_fn))
        with tf.Session() as session:
            cm_out = session.run(cm)

    # Normalize the confusion matrix so that each row sums to 1.
    cm_out = cm_out.astype(float) / cm_out.sum(axis=1)[:, np.newaxis]

    sns.heatmap(cm_out, annot=True, xticklabels=LABELS, yticklabels=LABELS)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.show()
Exemple #20
0
 def testFeatureColumnsWithResources(self, mock_add_resource):
     feature_column = hub.text_embedding_column("text_a", self.spec)
     if not isinstance(feature_column, feature_column_v2.FeatureColumn):
         self.skipTest(
             "Resources not implemented in the state manager of feature "
             "column v2.")
     self.assertTrue(
         feature_column_v2.is_feature_column_v2([feature_column]))
Exemple #21
0
def main():
    tf.logging.set_verbosity(tf.logging.ERROR)

    t = time.time()
    train_df, test_df = download_and_load_datasets()
    train_df.head()
    print("Data Loaded, time: %.5fs" % (time.time() - t))

    # Training input on the whole training set with no limit on training epochs.
    train_input_fn = tf.estimator.inputs.pandas_input_fn(train_df,
                                                         train_df["polarity"],
                                                         batch_size=128,
                                                         num_epochs=5,
                                                         shuffle=True)

    # Prediction on the whole training set.
    predict_train_input_fn = tf.estimator.inputs.pandas_input_fn(
        train_df, train_df["polarity"], batch_size=128, shuffle=False)
    # Prediction on the test set.
    predict_test_input_fn = tf.estimator.inputs.pandas_input_fn(
        test_df, test_df["polarity"], batch_size=128, shuffle=False)

    embedded_text_feature_column = hub.text_embedding_column(
        key="sentence",
        module_spec="https://tfhub.dev/google/nnlm-en-dim128/1")
    print("Sentences embedded, time: %.5fs" % (time.time() - t))

    estimator = tf.estimator.DNNClassifier(
        hidden_units=[500, 100],
        feature_columns=[embedded_text_feature_column],
        n_classes=2,
        optimizer=tf.train.AdagradOptimizer(learning_rate=0.003))
    """### Training

    Train the estimator for a reasonable amount of steps.
    """

    print("Train")
    # Reduce logging output.
    #tf.logging.set_verbosity(tf.logging.INFO)
    # Training for 1,000 steps means 128,000 training examples with the default
    # batch size. This is roughly equivalent to 5 epochs since the training dataset
    # contains 25,000 examples.
    estimator.train(input_fn=train_input_fn, steps=1000)
    # Reduce logging output.
    #tf.logging.set_verbosity(tf.logging.ERROR)
    print("Training complete, time: %.5fs" % (time.time() - t))
    """# Prediction

    Run predictions for both training and test set.
    """

    train_eval_result = estimator.evaluate(input_fn=predict_train_input_fn)
    test_eval_result = estimator.evaluate(input_fn=predict_test_input_fn)

    print("Training set accuracy: {accuracy}".format(**train_eval_result))
    print("Test set accuracy: {accuracy}".format(**test_eval_result))
Exemple #22
0
 def testDenseFeatures(self):
   features = {
       "text_a": ["hello world", "pair-programming"],
       "text_b": ["hello world", "oov token"],
   }
   feature_columns = [
       hub.text_embedding_column("text_a", self.spec, trainable=False),
       hub.text_embedding_column("text_b", self.spec, trainable=False),
   ]
   if not feature_column_v2.is_feature_column_v2(feature_columns):
     self.skipTest("Resources not implemented in the state manager of feature "
                   "column v2.")
   with tf.Graph().as_default():
     feature_layer = _dense_features_module.DenseFeatures(feature_columns)
     feature_layer_out = feature_layer(features)
     with tf_v1.train.MonitoredSession() as sess:
       output = sess.run(feature_layer_out)
       self.assertAllEqual(
           output, [[1, 2, 3, 4, 1, 2, 3, 4], [5, 5, 5, 5, 0, 0, 0, 0]])
Exemple #23
0
def save(estimator, text_embedding_module, export_dir_base):
    embedded_text_feature_column = hub.text_embedding_column(
        key="sentence", module_spec=text_embedding_module)
    feature_columns = [embedded_text_feature_column]
    feature_spec = tf.feature_column.make_parse_example_spec(feature_columns)
    serving_input_receiver_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(
        feature_spec)
    estimator.export_savedmodel(
        export_dir_base=export_dir_base,
        serving_input_receiver_fn=serving_input_receiver_fn)
Exemple #24
0
def create(text_embedding_module, model_dir):
    embedded_text_feature_column = hub.text_embedding_column(
        key="sentence", module_spec=text_embedding_module)

    return tf.estimator.DNNClassifier(
        hidden_units=[500, 100],
        feature_columns=[embedded_text_feature_column],
        n_classes=2,
        optimizer=tf.train.AdagradOptimizer(learning_rate=0.003),
        model_dir=model_dir)
Exemple #25
0
    def fit(self, X, y=None):

        embedded_text_feature_column = hub.text_embedding_column(
            key=self.text_feature_key,
            module_spec=self.tf_hub_module,
            trainable=self.trainable)

        # train_input_fn = tf.estimator.inputs.pandas_input_fn(dfs['description'].to_frame(), dfs["label"], batch_size=1200, num_epochs=None, shuffle=False)
        train_input_fn = tf.estimator.inputs.pandas_input_fn(
            X, y, batch_size=self.batch_size, num_epochs=None, shuffle=True)

        optimizer = tf.train.ProximalAdagradOptimizer(
            learning_rate=self.learning_rate,
            l1_regularization_strength=self.l1_regularization_strength,
            l2_regularization_strength=self.l2_regularization_strength)
        activation = {
            'relu': tf.nn.relu,
            'tanh': tf.nn.tanh,
            'sigmoid': tf.nn.sigmoid,
            'elu': tf.nn.elu,
            'softplus': tf.nn.softplus,
            'softsign': tf.nn.softsign,
            'relu6': tf.nn.relu6
        }
        path = ''.join(random.choice('0123456789ABCDEF') for i in range(16))
        path = '../' + path
        self.model_dir = path
        os.mkdir(self.model_dir)
        print self.model_dir
        #run_config = tf.estimator.RunConfig(save_summary_steps=None, save_checkpoints_secs=None)
        self.estimator = tf.estimator.DNNClassifier(
            hidden_units=self.hidden_units,
            feature_columns=[embedded_text_feature_column],
            n_classes=self.num_classes,
            optimizer=optimizer,
            dropout=self.dropout,
            batch_norm=self.batch_norm,
            activation_fn=activation[self.activation_fn],
            #config=run_config,
            model_dir=self.model_dir)
        path = ''.join(random.choice('0123456789ABCDEF') for i in range(16))
        self.eval_path = './' + path
        os.mkdir(self.eval_path)
        print self.eval_path
        early_stopping = tf.contrib.estimator.stop_if_no_decrease_hook(
            self.estimator,
            metric_name='loss',
            max_steps_without_decrease=400,
            min_steps=100,
            eval_dir=self.eval_path)

        self.estimator.train(input_fn=train_input_fn,
                             steps=self.training_steps)
        tf.reset_default_graph()
        return self
Exemple #26
0
def create_feature_columns(hparams):

    title_embeding_column = hub.text_embedding_column(
        "title", "https://tfhub.dev/google/universal-sentence-encoder/1")

    feature_columns = [title_embeding_column]

    print("feature columns: \n {}".format(feature_columns))
    print("")

    return feature_columns
Exemple #27
0
    def run(self, debug=False):
        init_op = tf.global_variables_initializer()

        # Reduce logging output.
        if debug:
            tf.logging.set_verbosity(tf.logging.ERROR)
        self.data_train.head()

        # Training input on the whole training set with no limit on training epochs.
        train_input_fn = tf.estimator.inputs.pandas_input_fn(
            x=self.data_train,
            y=self.data_train[self.labelKey],
            num_epochs=None,
            shuffle=True)

        # Prediction on the whole training set.
        predict_train_input_fn = tf.estimator.inputs.pandas_input_fn(
            self.data_train, self.data_train[self.labelKey], shuffle=True)

        embedded_text_feature_column = hub.text_embedding_column(
            key=self.dataKey, module_spec=self.embeded_text_url)
        #print self.data_train
        #print len(np.unique(self.data_train[self.labelKey]))
        my_checkpointing_config = tf.estimator.RunConfig(
            save_checkpoints_secs=10 *
            60,  # Save checkpoints every 20 minutes.
            keep_checkpoint_max=10,  # Retain the 10 most recent checkpoints.
        )
        estimator = tf.estimator.DNNClassifier(
            hidden_units=self.hidden_units_size,
            feature_columns=[embedded_text_feature_column],
            n_classes=len(np.unique(self.data_train[self.labelKey])),
            optimizer=tf.train.AdagradOptimizer(
                learning_rate=self.learning_rate),
            dropout=0.25)
        #model_dir=self.export_dir_base, config=my_checkpointing_config)

        # Training for 1,000 steps means 128,000 training examples with the default
        # batch size. This is roughly equivalent to 5 epochs since the training dataset
        # contains 25,000 examples.
        classifier = estimator.train(input_fn=train_input_fn, steps=100)

        # Save the training model
        #print('Exporting trained model to', self.export_dir_base)
        #classifier.export_savedmodel(export_dir_base=self.export_dir_base,
        #                               serving_input_receiver_fn=self.serving_input_receiver_fn, as_text=False)

        #
        train_eval_result = estimator.evaluate(input_fn=predict_train_input_fn)
        # test_eval_result = estimator.evaluate(input_fn=predict_test_input_fn)
        #
        # print "DNN Training set accuracy: {accuracy}".format(**train_eval_result)
        # print "DNN Test set accuracy: {accuracy}".format(**test_eval_result)
        return estimator
Exemple #28
0
def build_estimator(config, hidden_units, learning_rate, dropout, optimizer,
                    hub_module, train_hub):
    hub_column = hub.text_embedding_column(key="sentence",
                                           module_spec=hub_module,
                                           trainable=train_hub)
    return tf.estimator.DNNClassifier(config=config,
                                      feature_columns=[hub_column],
                                      hidden_units=hidden_units,
                                      optimizer=get_optimizer(
                                          optimizer, learning_rate),
                                      dropout=dropout)
Exemple #29
0
def RunClassificationModel(score, module, train_module=False):
    # We only care about errors
    tf.logging.set_verbosity(tf.logging.ERROR)

    # Set up our data frames
    train_df, test_df, test = load_facebook_data(score)
    train_df.head()

    # Train model
    train_input_fn = tf.estimator.inputs.pandas_input_fn(train_df,
                                                         train_df[score],
                                                         num_epochs=None,
                                                         shuffle=True)
    predict_train_input_fn = tf.estimator.inputs.pandas_input_fn(
        train_df, train_df[score], shuffle=False)
    predict_test_input_fn = tf.estimator.inputs.pandas_input_fn(test_df,
                                                                test_df[score],
                                                                shuffle=False)

    # Set up our feature columns from the model
    embedded_text_feature_column = hub.text_embedding_column(
        key="status",
        module_spec=module,
        trainable=train_module,
    )

    # Set up our neural network
    estimator = tf.estimator.DNNClassifier(
        hidden_units=[500, 100],
        feature_columns=[embedded_text_feature_column],
        n_classes=6,
        optimizer=tf.train.AdagradOptimizer(learning_rate=0.003, ))

    # Train our neural network
    estimator.train(input_fn=train_input_fn, steps=10)

    # Get Results
    train_eval_result = estimator.evaluate(input_fn=predict_train_input_fn)
    test_eval_result = estimator.evaluate(input_fn=predict_test_input_fn)
    test_predictions = estimator.predict(input_fn=predict_test_input_fn)

    train_accuracy = train_eval_result["accuracy"] * 100.
    test_accuracy = test_eval_result["accuracy"] * 100.

    # Print results
    print "Results"
    print "================"
    print "Model: " + module
    print "Score: " + score
    print "Train Module: " + str(train_module)
    print "================"
    print "Training set accuracy: {0:.2f}%".format(train_accuracy)
    print "Test set accuracy: {0:.2f}%".format(test_accuracy)
    print ""
Exemple #30
0
    def build_dnn_classifier(self, hidden_vec, lrate):
        embedded_text_feature_column = tfhub.text_embedding_column(
            key=self.text_column_label,
            module_spec=self.encoder_module,
            trainable=True)

        return tf.estimator.DNNClassifier(
            model_dir="models/v1",
            hidden_units=hidden_vec,
            feature_columns=[embedded_text_feature_column],
            n_classes=self.n_classes,
            optimizer=tf.train.AdagradOptimizer(learning_rate=lrate))
    def create_feature_columns(args):
        # Create content_id feature column
        content_id_column = tf.feature_column.categorical_column_with_hash_bucket(
            key = "content_id",
            hash_bucket_size = number_of_content_ids)

        # Embed content id into a lower dimensional representation
        embedded_content_column = tf.feature_column.embedding_column(
            categorical_column = content_id_column,
            dimension = args['content_id_embedding_dimensions'])

        # Create category feature column
        categorical_category_column = tf.feature_column.categorical_column_with_vocabulary_file(
            key = "category",
            vocabulary_file = tf.gfile.Glob(filename = "gs://{}/hybrid_recommendation/preproc/vocabs/category_vocab.txt*".format(args['bucket']))[0],
            num_oov_buckets = 1)

        # Convert categorical category column into indicator column so that it can be used in a DNN
        indicator_category_column = tf.feature_column.indicator_column(categorical_column = categorical_category_column)

        # Create title feature column using TF Hub
        embedded_title_column = hub.text_embedding_column(
            key = "title", 
            module_spec = "https://tfhub.dev/google/nnlm-de-dim50-with-normalization/1",
            trainable = False)

        # Create author feature column
        author_column = tf.feature_column.categorical_column_with_hash_bucket(
            key = "author",
            hash_bucket_size = number_of_authors + 1)

        # Embed author into a lower dimensional representation
        embedded_author_column = tf.feature_column.embedding_column(
            categorical_column = author_column,
            dimension = args['author_embedding_dimensions'])

        # Create months since epoch boundaries list for our binning
        months_since_epoch_boundaries = list(range(400, 700, 20))

        # Create months_since_epoch feature column using raw data
        months_since_epoch_column = tf.feature_column.numeric_column(
            key = "months_since_epoch")

        # Create bucketized months_since_epoch feature column using our boundaries
        months_since_epoch_bucketized = tf.feature_column.bucketized_column(
            source_column = months_since_epoch_column,
            boundaries = months_since_epoch_boundaries)

        # Cross our categorical category column and bucketized months since epoch column
        crossed_months_since_category_column = tf.feature_column.crossed_column(
            keys = [categorical_category_column, months_since_epoch_bucketized],
            hash_bucket_size = len(months_since_epoch_boundaries) * (number_of_categories + 1))

        # Convert crossed categorical category and bucketized months since epoch column into indicator column so that it can be used in a DNN
        indicator_crossed_months_since_category_column = tf.feature_column.indicator_column(categorical_column = crossed_months_since_category_column)

        # Create user and item factor feature columns from our trained WALS model
        user_factors = [tf.feature_column.numeric_column(key = "user_factor_" + str(i)) for i in range(10)]
        item_factors =  [tf.feature_column.numeric_column(key = "item_factor_" + str(i)) for i in range(10)]

        # Create list of feature columns
        feature_columns = [embedded_content_column,
                           embedded_author_column,
                           indicator_category_column,
                           embedded_title_column,
                           indicator_crossed_months_since_category_column] + user_factors + item_factors

        return feature_columns