def dev_accuracy(commons_path, dev_path, tmp_folder, caspar): dev = Corpora(dev_path, caspar.spec.commons) print "Annotating dev documents", now(), mem() test_path = os.path.join(tmp_folder, "dev.annotated.rec") writer = sling.RecordWriter(test_path) count = 0 start_time = time.time() cascade = caspar.spec.cascade dev_total = [0] * cascade.size() dev_disallowed = [0] * cascade.size() for document in dev: state, disallowed, total, trace = \ caspar.forward(document, train=False, debug=True) state.write() trace.write() writer.write(str(count), state.encoded()) count += 1 if count % 100 == 0: print " Annotated", count, "documents", now(), mem() for i, c in enumerate(disallowed): dev_total[i] += total[i] dev_disallowed[i] += c writer.close() end_time = time.time() print "Annotated", count, "documents in", "%.1f" % (end_time - start_time), \ "seconds", now(), mem() print "Disallowed/Total leaf actions for", cascade.__class__.__name__ for i, c in enumerate(dev_disallowed): print "Delegate", i, "disallowed", c, "out of", dev_total[i] return utils.frame_evaluation(gold_corpus_path=dev_path, \ test_corpus_path=test_path, \ commons_path=commons_path)
def update(self): if self.count > self.last_update_count: self.last_update_count = self.count start = time.time() objective = self.batch_losses.average() # Add the regularization penalty to the objective. l2 = Var(torch.Tensor([0.0])) if self.hparams.l2_coeff > 0.0: for p in self.model.regularized_params: l2 += 0.5 * self.hparams.l2_coeff * torch.sum(p * p) objective += l2 objective /= 3.0 # for parity with TF value = objective.data[0] # Compute gradients. objective.backward() # Clip them. self.clip_gradients() # Apply them. self.optimizer.step() # Done for this batch, prepare for the next one. self._reset() end = time.time() num_batches = self.count / self.hparams.batch_size if self.hparams.moving_avg: # Update the moving averages. # Use a more conservative decay factor in the first few batches. decay = self.hparams.moving_avg_coeff decay2 = (1.0 + num_batches) / (10.0 + num_batches) if decay > decay2: decay = decay2 for name, p in self.model.named_parameters(): if p.requires_grad and name in self.averages: diff = (self.averages[name] - p.data) * (1 - decay) self.averages[name].sub_(diff) print(now(), "BatchLoss after", "(%d" % num_batches, \ "batches =", self.count, "examples): %.4f" % value.item(), \ "incl. L2=%.6f" % (l2 / 3.0).item(), \ "(%.1f" % (end - start), "secs)", mem()) sys.stdout.flush()