def get_predictions(model: tf.estimator.Estimator, ds: tf.data.Dataset): '''Retrieve predictions from model.''' preds = model.predict(train_fn(ds, shuffle=False, repeat=1)) preds = list(preds) probabilities = np.vstack(pred["probabilities"] for pred in preds) class_ids = np.hstack(pred["class_ids"] for pred in preds) return probabilities, class_ids
def get_final_predictions(in_contexts, in_last_sentences, tokenizer, estimator: tf.estimator.Estimator, label_list): """ Return the log probabilities based on the story context and the endings proposed Parameters ---------- in_contexts: str of the story context in_last_sentences: proposed last sentence tokenizer: bert tokenizer estimator: tf.estimator label_list: possible values """ input_examples = [ run_classifier.InputExample(guid="", text_a=x, text_b=y, label=0) for x, y in zip(in_contexts, in_last_sentences) ] # here, "" is just a dummy label input_features = run_classifier.convert_examples_to_features( input_examples, label_list, flags.max_seq_length, tokenizer) predict_input_fn = run_classifier.input_fn_builder( features=input_features, seq_length=flags.max_seq_length, is_training=False, drop_remainder=False) predictions = estimator.predict(predict_input_fn) predictions = [prediction['probabilities'] for prediction in predictions] return predictions
def fit_model_on_fold(self, compiled_model: tf.estimator.Estimator, curr_fold_indices, train_sequences, test_sequences): """ trains compiled (but previously unfitted) model against given indices :param compiled_model: :param curr_fold_indices: :param train_sequences: :param test_sequences: :return: """ def train_input_fn(features, labels, batch_size): dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels)) dataset = dataset.shuffle(10000).repeat().batch(batch_size) return dataset def eval_input_fn(features, labels, batch_size): """use for both validation and prediction""" features = dict(features) if labels is None: inputs = features else: inputs = (features, labels) dataset = tf.data.Dataset.from_tensor_slices(inputs) return dataset.batch(batch_size) train_indices, val_indices = curr_fold_indices x_train = {'sequence': train_sequences[train_indices]} y_train = self.raw_train_df[self.target_cols].iloc[train_indices].values x_val = {'sequence': train_sequences[val_indices]} y_val = self.raw_train_df[self.target_cols].iloc[val_indices].values compiled_model.train(input_fn=lambda: train_input_fn(x_train, y_train, self.batch_size), steps=self.epochs * len(train_indices) // self.batch_size,) lambda_input_fn = lambda: eval_input_fn(x_val, None, self.batch_size) val_predictions = compiled_model.predict(lambda_input_fn) val_prob = np.array([x['probabilities'] for x in val_predictions]) val_roc_auc_score = roc_auc_score(y_val, val_prob) print('ROC-AUC val score: {0:.4f}'.format(val_roc_auc_score)) x_test = {'sequence': test_sequences} lambda_input_fn = lambda: eval_input_fn(x_test, None, self.batch_size) test_predictions = compiled_model.predict(input_fn=lambda_input_fn) test_prob = np.array([x['probabilities'] for x in test_predictions]) return val_roc_auc_score, test_prob
def _predict(self, estimator: tf.estimator.Estimator, pred_fn: Callable) -> list: """ Function to yield prediction results from the model :param estimator: your estimator function :param pred_fn: input_fn associated with prediction dataset :return: a list containing a prediction for each batch in the dataset """ return list(estimator.predict(input_fn=pred_fn))
def _get_predictions(self, estimator: tf.estimator.Estimator, eval_fn: Callable[[], Dict[str, tf.Tensor]]) -> MoleculePredictions: collect_edges = CollectTensorHook('adjacency_in:0') collect_nodes = CollectTensorHook('features:0') predictions = estimator.predict(eval_fn, hooks=[collect_edges, collect_nodes]) pred = collect_predictions(predictions) feat = np.stack(pred['reconstructed/features'], axis=0) adj = np.stack(pred['reconstructed/adjacency'], axis=0) feat, adj = onehot_to_dense(feat, adj) mols_recon = MoleculeGraph(nodes=feat, edges=adj) mols_real = MoleculeGraph( nodes=np.row_stack(collect_nodes.data), edges=np.row_stack(collect_edges.data)) return MoleculePredictions(inputs=mols_real, embeddings=np.row_stack(pred['embedding']), reconstructions=mols_recon)