def evaluate_on_examples(self, step, examples, visualizer): evaluation = Evaluation() examples = verboserate(examples, desc='Decoding {} examples'.format( visualizer.group_name)) visualizer.reset(step=step) for ex_batch in as_batches(examples, self.config.batch_size): beams, batch_evaluation = visualizer.predictions(ex_batch) evaluation.add_evaluation(batch_evaluation) # collect value function examples value_function = self.decoder._value_function vf_examples = [] for example, beam in izip(ex_batch, beams): vf_examples.extend( ValueFunctionExample.examples_from_paths(beam, example)) # compute ValueFunction metrics vf_loss = value_function.loss(vf_examples) predicted_values = value_function.values( [ex.case for ex in vf_examples]) avg_predicted_value = np.mean(predicted_values) evaluation.add('valueFunctionLoss', vf_loss) evaluation.add('avgPredictedValue', avg_predicted_value) return evaluation
def train(self): decoder = self.decoder eval_steps = self.config.timing.eval big_eval_steps = self.config.timing.big_eval save_steps = self.config.timing.save self.evaluate( step=decoder.step) # evaluate once before training begins while True: train_examples = random.sample( self.train_examples, k=len(self.train_examples)) # random shuffle train_examples = verboserate(train_examples, desc='Streaming training Examples') for example_batch in as_batches(train_examples, self.config.batch_size): decoder.train_step(example_batch) step = decoder.step self.report_cache_stats(step) if (step + 1) % save_steps == 0: self.saver.save(step) if (step + 1) % eval_steps == 0: self.evaluate(step) if (step + 1) % big_eval_steps == 0: self.big_evaluate(step) if step >= self.config.max_iters: self.evaluate(step) self.saver.save(step) return
def _score_experiences(self, experiences): """Score experiences by the log prob of their actions. Args: experiences (list[Experience]) Returns: list[ScoredExperience] """ scored_experiences = [] for exp_batch in as_batches(experiences, self._scoring_batch_size): states = [exp.state for exp in exp_batch] # force the second DOM attention to select the correct elements force_dom_attn = [exp.action.element for exp in exp_batch] force_type_values = [ exp.action.text if isinstance( exp.action, MiniWoBFocusAndType) else None for exp in exp_batch] action_scores_batch = self.score_actions( states, force_dom_attn=force_dom_attn, force_type_values=force_type_values) assert len(exp_batch) == len(action_scores_batch) for exp, action_scores in zip(exp_batch, action_scores_batch): log_prob = action_scores.as_variables[exp.action] state_value = action_scores.state_value scored_exp = ScoredExperience(exp.state, exp.action, exp.undiscounted_reward, log_prob, state_value, exp.metadata) scored_experiences.append(scored_exp) return scored_experiences
def test_as_batches(): items = [0, 1, 2, 3, 4, 5, 6] assert list(as_batches(items, 2)) == [[0, 1], [2, 3], [4, 5], [6]]
def forward(self, dom_elem): """Embeds a batch of DOMElements. Args: dom_elem (list[list[DOMElement]]): batch of list of DOM. Each batch must already be padded to have the same number of DOM elements. Returns: Variable(FloatTensor): batch x num_dom_elems x embed_dim """ # Check that the batches are rectangular for dom_list in dom_elem: assert len(dom_list) == len(dom_elem[0]) num_dom_elems = len(dom_elem[0]) dom_elem = flatten(dom_elem) # (batch * max_dom_num) x lstm_dim text_embeddings = [] for batch in as_batches(dom_elem, 100): final_states, combined_states = self._utterance_embedder( [word_tokenize(dom.text) for dom in batch]) text_embeddings.append(final_states) text_embeddings = torch.cat(text_embeddings, 0) # (batch * max_dom_num) x tag_embed_dim tag_embeddings = self._tag_embedder.embed_tokens( [dom.tag for dom in dom_elem]) value_embeddings = self._value_embedder.embed_tokens( [bool(dom.value) for dom in dom_elem]) tampered_embeddings = self._tampered_embedder.embed_tokens( [dom.tampered for dom in dom_elem]) class_embeddings = self._classes_embedder.embed_tokens( [dom.classes for dom in dom_elem]) # (batch * max_dom_num) x 4 fg_colors = [ GPUVariable(torch.FloatTensor(elem.fg_color)) for elem in dom_elem ] fg_colors = torch.stack(fg_colors) bg_colors = [ GPUVariable(torch.FloatTensor(elem.bg_color)) for elem in dom_elem ] bg_colors = torch.stack(bg_colors) # (batch * max_dom_num) x 2 coords = [ GPUVariable( torch.FloatTensor((float(elem.left) / positions.IMAGE_COLS, float(elem.top) / positions.IMAGE_ROWS))) for elem in dom_elem ] coords = torch.stack(coords) # (batch * max_dom_num) * dom_embed_dim dom_embeddings = torch.cat( (text_embeddings, tag_embeddings, value_embeddings, tampered_embeddings, class_embeddings, coords, fg_colors, bg_colors), dim=1) # batch x max_dom_num x dom_embed_dim return dom_embeddings.view(-1, num_dom_elems, self.embed_dim)