예제 #1
0
def _remove_metrics(estimator: tf.estimator.Estimator,
                    metrics_to_remove: Union[List[Text], Callable[[Text],
                                                                  bool]]):
    """Modifies the Estimator to make its model_fn return less metrics in EVAL.

  Note that this only removes the metrics from the
  EstimatorSpec.eval_metric_ops. It does not remove them from the graph or
  undo any side-effects that they might have had (e.g. modifications to
  METRIC_VARIABLES collections).

  This is useful for when you use py_func, streaming metrics, or other metrics
  incompatible with TFMA in your trainer. To keep these metrics in your trainer
  (so they still show up in Tensorboard) and still use TFMA, you can call
  remove_metrics on your Estimator before calling export_eval_savedmodel.

  This is a context manager, so it can be used like:
    with _remove_metrics(estimator, ['streaming_auc']):
      tfma.export.export_eval_savedmodel(estimator, ...)

  Args:
    estimator: tf.estimator.Estimator to modify. Will be mutated in place.
    metrics_to_remove: List of names of metrics to remove.

  Yields:
    Nothing.
  """
    old_call_model_fn = estimator._call_model_fn  # pylint: disable=protected-access

    def wrapped_call_model_fn(unused_self, features, labels, mode, config):
        result = old_call_model_fn(features, labels, mode, config)
        if mode == tf.estimator.ModeKeys.EVAL:
            filtered_eval_metric_ops = {}
            for k, v in result.eval_metric_ops.items():
                if isinstance(metrics_to_remove, collections.Iterable):
                    if k in metrics_to_remove:
                        continue
                elif callable(metrics_to_remove):
                    if metrics_to_remove(k):
                        continue
                filtered_eval_metric_ops[k] = v
            result = result._replace(eval_metric_ops=filtered_eval_metric_ops)
        return result

    estimator._call_model_fn = types.MethodType(  # pylint: disable=protected-access
        wrapped_call_model_fn, estimator)

    yield

    estimator._call_model_fn = old_call_model_fn  # pylint: disable=protected-access
예제 #2
0
def get_predictions(model: tf.estimator.Estimator, ds: tf.data.Dataset):
    '''Retrieve predictions from model.'''
    preds = model.predict(train_fn(ds, shuffle=False, repeat=1))
    preds = list(preds)
    probabilities = np.vstack(pred["probabilities"] for pred in preds)
    class_ids = np.hstack(pred["class_ids"] for pred in preds)
    return probabilities, class_ids
예제 #3
0
def get_final_predictions(in_contexts, in_last_sentences, tokenizer,
                          estimator: tf.estimator.Estimator, label_list):
    """
    Return the log probabilities based on the story context and the endings proposed

    Parameters
    ----------
    in_contexts:            str of the story context
    in_last_sentences:      proposed last sentence
    tokenizer:              bert tokenizer
    estimator:              tf.estimator
    label_list:             possible values
    """
    input_examples = [
        run_classifier.InputExample(guid="", text_a=x, text_b=y, label=0)
        for x, y in zip(in_contexts, in_last_sentences)
    ]  # here, "" is just a dummy label
    input_features = run_classifier.convert_examples_to_features(
        input_examples, label_list, flags.max_seq_length, tokenizer)
    predict_input_fn = run_classifier.input_fn_builder(
        features=input_features,
        seq_length=flags.max_seq_length,
        is_training=False,
        drop_remainder=False)
    predictions = estimator.predict(predict_input_fn)
    predictions = [prediction['probabilities'] for prediction in predictions]

    return predictions
    def fit_model_on_fold(self, compiled_model: tf.estimator.Estimator, curr_fold_indices,
                          train_sequences, test_sequences):
        """
        trains compiled (but previously unfitted) model against given indices
        :param compiled_model:
        :param curr_fold_indices:
        :param train_sequences:
        :param test_sequences:
        :return:
        """
        def train_input_fn(features, labels, batch_size):
            dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
            dataset = dataset.shuffle(10000).repeat().batch(batch_size)
            return dataset

        def eval_input_fn(features, labels, batch_size):
            """use for both validation and prediction"""
            features = dict(features)
            if labels is None:
                inputs = features
            else:
                inputs = (features, labels)

            dataset = tf.data.Dataset.from_tensor_slices(inputs)
            return dataset.batch(batch_size)

        train_indices, val_indices = curr_fold_indices
        x_train = {'sequence': train_sequences[train_indices]}
        y_train = self.raw_train_df[self.target_cols].iloc[train_indices].values

        x_val = {'sequence': train_sequences[val_indices]}
        y_val = self.raw_train_df[self.target_cols].iloc[val_indices].values

        compiled_model.train(input_fn=lambda: train_input_fn(x_train, y_train, self.batch_size),
                             steps=self.epochs * len(train_indices) // self.batch_size,)
        lambda_input_fn = lambda: eval_input_fn(x_val, None, self.batch_size)
        val_predictions = compiled_model.predict(lambda_input_fn)
        val_prob = np.array([x['probabilities'] for x in val_predictions])
        val_roc_auc_score = roc_auc_score(y_val, val_prob)
        print('ROC-AUC val score: {0:.4f}'.format(val_roc_auc_score))

        x_test = {'sequence': test_sequences}
        lambda_input_fn = lambda: eval_input_fn(x_test, None, self.batch_size)
        test_predictions = compiled_model.predict(input_fn=lambda_input_fn)
        test_prob = np.array([x['probabilities'] for x in test_predictions])

        return val_roc_auc_score, test_prob
예제 #5
0
 def _export_model(self, estimator: tf.estimator.Estimator,
                   save_location: str) -> None:
     """
     Used to export your model in a format that can be used with
     Tf.Serving
     :param estimator: your estimator function
     """
     # this should match the input shape of your model
     # TODO: update this to your input used in prediction/serving
     x1 = tf.feature_column.numeric_column("input", shape=[480, 640, 1])
     # create a list in case you have more than one input
     feature_columns = [x1]
     feature_spec = tf.feature_column.make_parse_example_spec(
         feature_columns)
     export_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(
         feature_spec)
     # export the saved model
     estimator.export_savedmodel(save_location, export_input_fn)
예제 #6
0
 def _predict(self, estimator: tf.estimator.Estimator,
              pred_fn: Callable) -> list:
     """
     Function to yield prediction results from the model
     :param estimator: your estimator function
     :param pred_fn: input_fn associated with prediction dataset
     :return: a list containing a prediction for each batch in the dataset
     """
     return list(estimator.predict(input_fn=pred_fn))
예제 #7
0
 def export(self, estimator: tf.estimator.Estimator):
     for variable_name in self.variable_names:
         variable_export_dir = Path(self.path_variables, variable_name)
         LOGGER.info(
             f"Saving variable {variable_name} to {variable_export_dir}")
         with ParquetDataset(variable_export_dir).open() as ds:
             variable_value = estimator.get_variable_value(variable_name)
             ds.write_pandas(pd.DataFrame(variable_value),
                             compression=self.compression,
                             chunk_size=self.chunk_size)
예제 #8
0
    def export(self, estimator: tf.estimator.Estimator):
        # Reload summaries and select best step
        LOGGER.info(f"Reloading summaries from {estimator.model_dir}")
        summaries = read_eval_metrics(estimator.eval_dir()).items()
        for step, metrics in sorted(summaries):
            LOGGER.info(f"- {step}: {metrics}")
        sorted_summaries = sorted(summaries, key=lambda t: t[1][self.metric])
        if self.mode == BestMode.INCREASE:
            best_step, best_metrics = sorted_summaries[-1]
        elif self.mode == BestMode.DECREASE:
            best_step, best_metrics = sorted_summaries[0]
        else:
            raise ValueError(f"Mode {self.mode} not recognized.")
        LOGGER.info(f"Best summary at step {best_step}: {best_metrics}")

        # List available checkpoints and select closes to best_step
        checkpoints = Path(estimator.model_dir).glob(_CHEKPOINT_PATTERN)
        checkpoint_steps = [
            int(re.findall(r"-(\d+).index", str(path))[0])
            for path in checkpoints
        ]
        selected_step = sorted(checkpoint_steps,
                               key=lambda step: abs(step - best_step))[0]
        LOGGER.info(f"Selected checkpoint {selected_step}")

        # Change checkpoint information
        with Path(estimator.model_dir, "checkpoint").open("r") as file:
            lines = file.read().split("\n")
            lines[0] = f'model_checkpoint_path: "model.ckpt-{selected_step}"'

        with Path(estimator.model_dir, "checkpoint").open("w") as file:
            file.write("\n".join(lines))

        # Check that change is effective
        global_step = estimator.get_variable_value("global_step")
        if not global_step == selected_step:
            msg = f"Changed checkpoint file to use step {selected_step}, but estimator uses {global_step}"
            raise ValueError(msg)

        # Log to MLFlow
        if self.use_mlflow:
            mlflow.log_metric(key=self.tag, value=global_step)
예제 #9
0
 def _export_model(self, estimator: tf.estimator.Estimator,
                   save_location: str) -> None:
     """
     Used to export your model in a format that can be used with
     Tf.Serving
     :param estimator: your estimator function
     """
     # this should match the input shape of your model
     x1 = tf.feature_column.numeric_column(
         "input",
         shape=[
             self.config["train_batch_size"], conf.c_maxnum, conf.c_maxlen
         ])
     # create a list in case you have more than one input
     ## feature_columns = [x1]
     feature_columns = x1
     feature_spec = tf.feature_column.make_parse_example_spec(
         feature_columns)
     export_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(
         feature_spec)
     # export the saved model
     estimator.export_savedmodel(save_location, export_input_fn)
예제 #10
0
def evaluate(model: tf.estimator.Estimator,
             features: pd.DataFrame,
             labels: pd.DataFrame,
             steps: int = None):
    '''Check the mse on the validation set.'''

    ds = make_dataset(features, labels)

    results = model.evaluate(train_fn(ds, shuffle=False, repeat=1),
                             steps=steps)

    for stat_name, stat_value in results.items():
        print(f"{stat_name:>20} | {stat_value}")

    return results
예제 #11
0
파일: train.py 프로젝트: xixici/Alink
def train_estimator(estimator: tf.estimator.Estimator, input_config,
                    train_config, export_config, task_config: TrainTaskConfig):
    example_config = input_config['example_config']
    label_col = input_config['label_col']

    feature_specs = parse_feature_specs(example_config)
    dataset_fn = get_dataset_fn(feature_specs=feature_specs,
                                label_col=label_col,
                                **train_config)
    train_spec = tf.estimator.TrainSpec(dataset_fn)
    eval_spec = tf.estimator.EvalSpec(dataset_fn, steps=1)
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

    feature_placeholders = get_feature_placeholders(**export_config)
    serving_input_receiver_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(
        feature_placeholders)

    if (task_config.task_type == 'chief' and task_config.task_index == 0) or \
            (task_config.num_workers == 1):
        logging.info("Start exporting...")
        estimator.export_saved_model(
            task_config.saved_model_dir,
            serving_input_receiver_fn=serving_input_receiver_fn)
        logging.info("Finish exporting.")
예제 #12
0
 def __call__(
         self,
         estimator: tf.estimator.Estimator) -> tf.estimator.SessionRunHook:
     if estimator.config.is_chief:
         return _StopOnPredicateHook(
             partial(
                 _no_metric_improvement_fn,
                 eval_dir=estimator.eval_dir(),
                 min_steps=self.min_steps,
                 metric=self.metric,
                 max_steps_without_improvement=self.
                 max_steps_without_improvement,
                 mode=self.mode,
             ),
             run_every_secs=self.run_every_secs,
             run_every_steps=self.run_every_steps,
             final_step=self.final_step,
         )
     else:
         return _CheckForStoppingHook()
예제 #13
0
파일: predict.py 프로젝트: ai-med/almgig
    def _get_predictions(self,
                         estimator: tf.estimator.Estimator,
                         eval_fn: Callable[[], Dict[str, tf.Tensor]]) -> MoleculePredictions:
        collect_edges = CollectTensorHook('adjacency_in:0')
        collect_nodes = CollectTensorHook('features:0')

        predictions = estimator.predict(eval_fn, hooks=[collect_edges, collect_nodes])
        pred = collect_predictions(predictions)

        feat = np.stack(pred['reconstructed/features'], axis=0)
        adj = np.stack(pred['reconstructed/adjacency'], axis=0)

        feat, adj = onehot_to_dense(feat, adj)

        mols_recon = MoleculeGraph(nodes=feat, edges=adj)

        mols_real = MoleculeGraph(
            nodes=np.row_stack(collect_nodes.data),
            edges=np.row_stack(collect_edges.data))

        return MoleculePredictions(inputs=mols_real,
                                   embeddings=np.row_stack(pred['embedding']),
                                   reconstructions=mols_recon)
예제 #14
0
 def export(self, estimator: tf.estimator.Estimator):
     features = {field.name: field.as_placeholder(batch=True) for field in self.fields}
     return estimator.export_saved_model(
         self.path_saved_model, tf.estimator.export.build_raw_serving_input_receiver_fn(features)
     )