def collect_results(self, results: List[Dict]) -> None: self.result = ExecutionResult( outputs=results[0]['alignment'], losses=[], scalar_summaries=None, histogram_summaries=None, image_summaries=None)
def collect_results(self, results: List[Dict]) -> None: if len(results) != 1: raise ValueError('LogitsRunner needs exactly 1 execution result, ' 'got {}'.format(len(results))) train_loss = results[0]["train_loss"] runtime_loss = results[0]["runtime_loss"] # logits_list in shape (time, batch, vocab) logits_list = results[0]["logits"] # outputs are lists of strings (batch, time) outputs = [[] for _ in logits_list[0]] # type: List[List[str]] for time_step in logits_list: for logits, output_list in zip(time_step, outputs): if self._normalize: logits = np.exp(logits) / np.sum(np.exp(logits), axis=0) if self._pick_index: instance_logits = str(logits[self._pick_index]) else: instance_logits = ",".join(str(l) for l in logits) output_list.append(instance_logits) str_outputs = [["\t".join(l)] for l in outputs] self.result = ExecutionResult(outputs=str_outputs, losses=[train_loss, runtime_loss], scalar_summaries=None, histogram_summaries=None, image_summaries=None)
def collect_results(self, results: List[Dict]) -> None: assert len(results) == 1 result = results[0] if self.state == 0: # ACCUMULATING self.res_losses = result["losses"] # Are we updating? counter = result["counter"] if counter == self.trainer.batches_per_update: self.state = 1 return elif self.state == 1: if self.summaries: self.res_scal_sums = result["scalar_summaries"] self.res_hist_sums = result["histogram_summaries"] self.state = 2 return assert self.res_losses is not None self._result = ExecutionResult([], losses=self.res_losses, scalar_summaries=self.res_scal_sums, histogram_summaries=self.res_hist_sums, image_summaries=None)
def prepare_results(self, output): bs_scores = [s[self._rank - 1] for s in output.scores] tok_ids = np.transpose(output.token_ids, [1, 2, 0]) decoded_tokens = [toks[self._rank - 1][1:] for toks in tok_ids] for i, sent in enumerate(decoded_tokens): decoded = [] for tok_id in sent: if tok_id == END_TOKEN_INDEX: break decoded.append(self._decoder.vocabulary.index_to_word[tok_id]) decoded_tokens[i] = decoded if self._postprocess is not None: decoded_tokens = self._postprocess(decoded_tokens) # TODO: provide better summaries in case (issue #599) # we want to use the runner during training. self._result = ExecutionResult( outputs=decoded_tokens, losses=[np.mean(bs_scores) * len(bs_scores)], scalar_summaries=None, histogram_summaries=None, image_summaries=None)
def collect_results(self, results: List[Dict]) -> None: train_loss = 0. runtime_loss = 0. summed_logprobs = [ -np.inf for _ in range(results[0]["decoded_logprobs"].shape[0]) ] for sess_result in results: train_loss += sess_result["train_xent"] runtime_loss += sess_result["runtime_xent"] for i, logprob in enumerate(sess_result["decoded_logprobs"]): summed_logprobs[i] = np.logaddexp(summed_logprobs[i], logprob) argmaxes = [np.argmax(l, axis=1) for l in summed_logprobs] decoded_tokens = self._vocabulary.vectors_to_sentences(argmaxes) if self._postprocess is not None: decoded_tokens = self._postprocess(decoded_tokens) image_summaries = results[0].get("image_summaries") self.result = ExecutionResult(outputs=decoded_tokens, losses=[train_loss, runtime_loss], scalar_summaries=None, histogram_summaries=None, image_summaries=image_summaries)
def collect_results(self, results: List[Dict]) -> None: loss = results[0].get("loss", 0.) summed_logprobs = results[0]["label_logprobs"] input_mask = results[0]["input_mask"] for sess_result in results[1:]: loss += sess_result.get("loss", 0.) summed_logprobs = np.logaddexp(summed_logprobs, sess_result["label_logprobs"]) assert input_mask == sess_result["input_mask"] argmaxes = np.argmax(summed_logprobs, axis=2) # CAUTION! FABULOUS HACK BELIEVE ME argmaxes -= END_TOKEN_INDEX argmaxes *= input_mask.astype(int) argmaxes += END_TOKEN_INDEX # must transpose argmaxes because vectors_to_sentences is time-major decoded_labels = self._vocabulary.vectors_to_sentences(argmaxes.T) if self._postprocess is not None: decoded_labels = self._postprocess(decoded_labels) self.result = ExecutionResult(outputs=decoded_labels, losses=[loss], scalar_summaries=None, histogram_summaries=None, image_summaries=None)
def collect_results(self, results: List[Dict]) -> None: perplexities = np.mean([2**res["xents"] for res in results], axis=0) xent = float(np.mean([res["xents"] for res in results])) self.result = ExecutionResult(outputs=perplexities.tolist(), losses=[xent], scalar_summaries=None, histogram_summaries=None, image_summaries=None)
def collect_results(self, results: List[Dict]) -> None: if self._used_session > len(results): raise ValueError( ("Session id {} is higher than number of used " "TensorFlow session ({}).").format(self._used_session, len(results))) vectors = results[self._used_session]["encoded"] self.result = ExecutionResult(outputs=vectors, losses=[], scalar_summaries=None, histogram_summaries=None, image_summaries=None)
def collect_results(self, results: List[Dict]) -> None: assert len(results) == 1 result = results[0] scalar_summaries = ( result["scalar_summaries"] if self.summaries else None) histogram_summaries = ( result["histogram_summaries"] if self.summaries else None) self._result = ExecutionResult( [], losses=result["losses"], scalar_summaries=scalar_summaries, histogram_summaries=histogram_summaries, image_summaries=None)
def collect_results(self, results: List[Dict]) -> None: if len(results) != 1: raise ValueError("PlainRunner needs exactly 1 execution result, " "got {}".format(len(results))) train_loss = results[0]["train_loss"] runtime_loss = results[0]["runtime_loss"] decoded = results[0]["decoded"] decoded_tokens = self._vocabulary.vectors_to_sentences(decoded) if self._postprocess is not None: decoded_tokens = self._postprocess(decoded_tokens) self.result = ExecutionResult(outputs=decoded_tokens, losses=[train_loss, runtime_loss], scalar_summaries=None, histogram_summaries=None, image_summaries=None)
def prepare_results(self): max_time = self._step decoded_tokens = [] bs_scores = [] # We extract last hyp_idx for each sentence in the batch hyp_indices = np.argpartition(-self._scores[-1], self._rank - 1)[:, self._rank - 1] for batch_idx, hyp_idx in enumerate(hyp_indices): output_tokens = [] bs_scores.append(self._scores[-1][batch_idx][hyp_idx]) for time in reversed(range(max_time)): token_id = self._token_ids[time][batch_idx][hyp_idx] token = self._decoder.vocabulary.index_to_word[token_id] output_tokens.append(token) hyp_idx = self._parent_ids[time][batch_idx][hyp_idx] output_tokens.reverse() before_eos_tokens = [] for tok in output_tokens: if tok == END_TOKEN: break # TODO: investigate why the decoder can start generating # padding before generating the END_TOKEN if tok != PAD_TOKEN: before_eos_tokens.append(tok) decoded_tokens.append(before_eos_tokens) if self._postprocess is not None: decoded_tokens = self._postprocess(decoded_tokens) # TODO: provide better summaries in case (issue #599) # we want to use the runner during training. self.result = ExecutionResult( outputs=decoded_tokens, losses=[np.mean(bs_scores) * len(bs_scores)], scalar_summaries=None, histogram_summaries=None, image_summaries=None)
def _prepare_results(self): max_time = self._step batch_size = self._scores.shape[ 1] # shape(_scores) = (time, batch, beam) # Decode token_ids into words from the vocabulary. decoded_tokens = [] # (batch, time, beam) for b in range(batch_size): batch = [] for t in range(max_time): tok_dec = [ self._decoder.vocabulary.index_to_word[tok] for tok in self._token_ids[t][b] ] batch.append(tok_dec) decoded_tokens.append(batch) # Prepare attention alignments. # First weight tensor is an abundance from the initial decoder call. # ^--- Overit att = self._attention_loop_states["weights"][1:] # TODO: Check that the reshaping doesn't mess up the order att = att.reshape((batch_size, max_time, self._decoder.beam_size, -1)) self._scores = np.transpose(self._scores, axes=(1, 0, 2)) self._parent_ids = np.transpose(self._parent_ids, axes=(1, 0, 2)) result = [] for i in range(batch_size): bs_graph = BeamSearchOutputGraph(self._scores[i], decoded_tokens[i], self._parent_ids[i], att[i], self._decoder.beam_size, max_time) result.append(bs_graph) self.result = ExecutionResult(outputs=result, losses=[], scalar_summaries=None, histogram_summaries=None, image_summaries=None) return
def collect_results(self, results: List[Dict]) -> None: predictions_sum = np.zeros_like(results[0]["prediction"]) mse_loss = 0. for sess_result in results: if "mse" in sess_result: mse_loss += sess_result["mse"] predictions_sum += sess_result["prediction"] predictions = predictions_sum / len(results) if self._postprocess is not None: predictions = self._postprocess(predictions) self.result = ExecutionResult(outputs=predictions.tolist(), losses=[mse_loss], scalar_summaries=None, histogram_summaries=None, image_summaries=None)
def collect_results(self, results: List[Dict]) -> None: if self.scalar_summaries is None: scalar_summaries = None histogram_summaries = None else: # TODO collect summaries from different sessions scalar_summaries = results[0]["scalar_summaries"] histogram_summaries = results[0]["histogram_summaries"] losses_sum = [0. for _ in self.losses] for session_result in results: for i in range(len(self.losses)): # from the end, losses are last ones losses_sum[i] += session_result["losses"][i] avg_losses = [s / len(results) for s in losses_sum] self.result = ExecutionResult( [], losses=avg_losses, scalar_summaries=scalar_summaries, histogram_summaries=histogram_summaries, image_summaries=None)
def collect_results(self, results: List[Dict]) -> None: if len(results) > 1 and self._select_session is None: sessions = [] for res_dict in results: sessions.append(self._fetch_values_from_session(res_dict)) # one call returns a list of dicts. we need to add another list # dimension in between, so it'll become a 2D list of dicts # with dimensions (batch, session, tensor_name) # the ``sessions`` structure is of 'shape' # (session, batch, tensor_name) so it should be sufficient to # transpose it: batched = list(zip(*sessions)) else: batched = self._fetch_values_from_session(results[0]) self.result = ExecutionResult(outputs=batched, losses=[], scalar_summaries=None, histogram_summaries=None, image_summaries=None)
def collect_results(self, results: List[Dict]) -> None: for sess_result in results: temps = sess_result['temp'] targets = sess_result['tar'] decoded_targets = self._vocabulary.vectors_to_sentences(targets) mrt_loss = sess_result['loss'] if self.scalar_summaries is None: scalar_summaries = None histogram_summaries = None else: # TODO collect summaries from different sessions scalar_summaries = results[0]['scalar_summaries'] histogram_summaries = results[0]['histogram_summaries'] self.result = ExecutionResult(outputs=[temps], losses=mrt_loss, scalar_summaries=scalar_summaries, histogram_summaries=histogram_summaries, image_summaries=None)
def collect_results(self, results: List[Dict]) -> None: """Process what the TF session returned. Only a single time step is always processed at once. First, distributions from all sessions are aggregated. """ summed_logprobs = -np.inf for sess_result in results: summed_logprobs = np.logaddexp(summed_logprobs, sess_result["logprobs"]) avg_logprobs = summed_logprobs - np.log(len(results)) expanded_batch = ExpandedBeamBatch(self._current_beam_batch, avg_logprobs) self._expanded.append(expanded_batch) if not self._to_exapand: self._time_step += 1 self._to_exapand = n_best( self._beam_size, self._expanded, self._beam_scoring_f) self._expanded = [] if self._time_step == self._decoder.max_output: top_batch = self._to_exapand[-1].decoded.T decoded_tokens = self._vocabulary.vectors_to_sentences(top_batch) if self._postprocess is not None: decoded_tokens = [self._postprocess(seq) for seq in decoded_tokens] loss = np.mean([res["xent"] for res in results]) self.result = ExecutionResult( outputs=decoded_tokens, losses=[loss], scalar_summaries=None, histogram_summaries=None, image_summaries=None )
def collect_results(self, results: List[Dict]) -> None: if len(results) > 1: raise ValueError("Beam search runner does not support ensembling.") evaluated_bs = results[0]["bs_outputs"] max_time = evaluated_bs.scores.shape[0] # pick the end of the hypothesis based on its rank hyp_index = np.argpartition( -evaluated_bs.scores[-1], self._rank - 1)[self._rank - 1] bs_score = evaluated_bs.scores[-1][hyp_index] # now backtrack output_tokens = [] # type: List[str] for time in reversed(range(max_time)): token_id = evaluated_bs.token_ids[time][hyp_index] token = self._vocabulary.index_to_word[token_id] output_tokens.append(token) hyp_index = evaluated_bs.parent_ids[time][hyp_index] output_tokens.reverse() before_eos_tokens = [] # type: List[str] for tok in output_tokens: if tok == END_TOKEN: break before_eos_tokens.append(tok) if self._postprocess is not None: decoded_tokens = self._postprocess([before_eos_tokens]) else: decoded_tokens = [before_eos_tokens] self.result = ExecutionResult( outputs=decoded_tokens, losses=[bs_score], scalar_summaries=None, histogram_summaries=None, image_summaries=None)
def collect_results(self, results: List[Dict]) -> None: if len(results) != 1: raise RuntimeError("CTCDebug runner does not support ensembling.") logits = results[0]["logits"] argmaxes = np.argmax(logits, axis=2).T decoded_batch = [] for indices in argmaxes: decoded_instance = [] for index in indices: if index == len(self._vocabulary): symbol = "<BLANK>" else: symbol = self._vocabulary.index_to_word[index] decoded_instance.append(symbol) decoded_batch.append(decoded_instance) self._result = ExecutionResult(outputs=decoded_batch, losses=[], scalar_summaries=None, histogram_summaries=None, image_summaries=None)
def join_execution_results( execution_results: List[ExecutionResult]) -> ExecutionResult: """Aggregate batch of execution results from a single runner.""" losses_sum = {loss: 0. for loss in execution_results[0].losses} def join(output_series: List[OutputSeries]) -> OutputSeries: """Join a list of batches of results into a flat list of outputs.""" joined = [] # type: List[Any] for item in output_series: joined.extend(item) # If the list is a list of np.arrays, concatenate the list along first # dimension (batch). Otherwise, return the list. if joined and isinstance(joined[0], np.ndarray): return np.array(joined) return joined outputs = {} # type: Dict[str, Any] for key in execution_results[0].outputs.keys(): outputs[key] = join([res.outputs[key] for res in execution_results]) for result in execution_results: for l_id, loss in result.losses.items(): losses_sum[l_id] += loss * result.size total_size = sum(res.size for res in execution_results) losses = {l_id: loss / total_size for l_id, loss in losses_sum.items()} all_summaries = [ summ for res in execution_results if res.summaries is not None for summ in res.summaries ] return ExecutionResult(outputs, losses, total_size, all_summaries)
def prepare_results(self): max_time = self._step output_tokens = [] hyp_idx = np.argpartition(-self._scores[-1], self._rank - 1)[self._rank - 1] bs_score = self._scores[-1][hyp_idx] for time in reversed(range(max_time)): token_id = self._token_ids[time][hyp_idx] token = self._decoder.vocabulary.index_to_word[token_id] output_tokens.append(token) hyp_idx = self._parent_ids[time][hyp_idx] output_tokens.reverse() before_eos_tokens = [] for tok in output_tokens: if tok == END_TOKEN: break # TODO: investigate why the decoder can start generating # padding before generating the END_TOKEN if tok != PAD_TOKEN: before_eos_tokens.append(tok) if self._postprocess is not None: decoded_tokens = self._postprocess([before_eos_tokens]) else: decoded_tokens = [before_eos_tokens] # TODO: provide better summaries in case (issue #599) # we want to use the runner during training. self.result = ExecutionResult(outputs=decoded_tokens, losses=[bs_score], scalar_summaries=None, histogram_summaries=None, image_summaries=None)