def _evaluate_language(language_df, model, feature_counts):
    """Performs prediction and/or evaluation of a single language.

  Returns the total number of evaluations/predictions and number of correct
  predictions (in evaluation mode). In prediction mode returns all the features
  with missing values filled in (in evaluation mode this list will be empty).

  Args:
    language_df: (pandas) Dataframe representing a single language.
    model: (object) Model to evaluate language with.
    feature_counts: (dict) Counters for the language features.

  Returns:
    A triple representing total number of evaluations, number of correct
    evaluations and the predicted features in SIGTYP format.
  """
    num_evals = 0
    num_correct = 0
    feature_values = sigtyp.get_feature_values(language_df)
    predictions = [
        language_df[col_id] for col_id in range(sigtyp.NUM_COLUMNS - 1)
    ]
    predicted_feature_values = []
    for feature, value in feature_values:
        if FLAGS.prediction_mode and _value_is_valid(value):
            # In prediction mode we update the predictions list and actually skip the
            # prediction stage for this particular feature.
            predicted_feature_values.append("%s=%s" % (feature, value))
            continue
        if not FLAGS.prediction_mode and not _value_is_valid(value):
            # In evaluation mode, don't evaluate on empty features.
            continue

        if feature not in feature_counts:
            feature_counts[feature] = {}
            feature_counts[feature]["correct"] = 0
            feature_counts[feature]["total"] = 0

        # In "pure" prediction mode we don't have the truth values to compare
        # against.
        unknown_feature = False
        if value == const.UNKNOWN_FEATURE_VALUE:
            unknown_feature = True
        context_features = _prepare_context_features(feature_values, feature)
        predicted_nbest_values = model.predict(language_df, context_features,
                                               feature)
        single_best = predicted_nbest_values[0]
        if FLAGS.prediction_mode:
            predicted_feature_values.append("%s=%s" % (feature, single_best))
        if not unknown_feature and single_best == value:  # Correct prediction.
            num_correct += 1
            feature_counts[feature]["correct"] += 1
        if not unknown_feature or FLAGS.prediction_mode:
            num_evals += 1
            feature_counts[feature]["total"] += 1

    if FLAGS.prediction_mode:
        predictions.append("|".join(predicted_feature_values))
    return num_evals, num_correct, predictions
def _features_to_predict(test_df):
    """Returns the list of feature names that we need to predict."""
    test_feature_names = set()
    for _, test_language_df in test_df.iterrows():
        feature_values = sigtyp.get_feature_values(test_language_df)
        for name, value in feature_values:
            if ((FLAGS.prediction_mode
                 and value == const.UNKNOWN_FEATURE_VALUE)
                    or (not FLAGS.prediction_mode and _value_is_valid(value))):
                test_feature_names.add(name)
    logging.info("====> %d features to evaluate/predict",
                 len(test_feature_names))
    return list(test_feature_names)