def post_execution_hook(self, **kwargs): metrics = MetricContainer(['validation_accuracy_test', 'validation_accuracy_test_OOV']) metrics.validation_accuracy_test.update(self._get_accuracy(self.data_class.valid_dl).item(), 1) # print(self._get_accuracy(self.data_class.train_dl, # self.data_class_bwd.train_dl).item()) # line = self.dataloader.get_train_input().iloc[0] # print(self.class_fwd.predict(line['utterance'])[0].obj) dl = self.current_version["oov_df"]() oov_db = TextDataBunch.from_df(path=self.experiment_dir, train_df=dl.get_test_input(), valid_df=dl.get_test_input(), text_cols='utterance', label_cols='functions', vocab=self.data_lm.train_ds.vocab, bs=BATCH_SIZE) # oov_db_bwd = TextDataBunch.from_df(path=self.experiment_dir, # train_df=dl.get_test_input(), # valid_df=dl.get_test_input(), # text_cols='utterance', # label_cols='functions', # vocab=self.data_lm.train_ds.vocab, # bs=BATCH_SIZE, # backwards=True) metrics.validation_accuracy_test_OOV.update(self._get_accuracy(oov_db.valid_dl).item(), 1) metrics.log_metrics() self.class_fwd.export('export_fwd.pkl') self.class_bwd.export('export_bwd.pkl')
def train_loop(self, input_fn): metric_container = MetricContainer(metrics=['1', 'b', 'c'], track_average_epoch_count=5) metric_container = MetricContainer(metrics=[{ 'metrics': ['a', 'b', 'c'] }, { 'metrics': ['2', 'd', 'e'], 'track_average_epoch_count': 10 }], track_average_epoch_count=5) self.log("calling input fn") input_fn() for epoch in iterator(range(6)): for idx in iterator(range(6), 2): metric_container.a.update(idx) metric_container.b.update(idx * 2) self.log("Epoch: {} step: {}".format(epoch, idx)) self.log("a {}".format(metric_container.a.avg())) self.log("b {}".format(metric_container.b.avg())) if idx % 3 == 0: metric_container.reset() metric_container.log_metrics(['a', '2']) metric_container.reset_epoch() metric_container.log_metrics() self.log("trained: {}".format(self.model.train())) self.copy_related_files("experiments/exports")
def _compare_data(gt_data, predicted_data, root_dir=None): mc = MetricContainer(["precision", "recall", "dcg", "skipped"]) # comparator = CCAComparison() # sim = comparator.run_comparison(train_data.vectors[:50], ui_layout_vector.vectors[:50]) # print(sim) # if "conv" in str(root_dir): # n_proc = 2 # else: n_proc = 7 with multiprocessing.Pool(n_proc) as p: # if "conv" in str(root_dir): # map_fn = lambda p, i: map(p, i) # else: map_fn = lambda pr, i: p.imap_unordered(pr, i, chunksize=100) print(map_fn, n_proc) for out in tqdm(map_fn( _process(predicted_data, gt_data), iterator(predicted_data.name_to_idx.items(), None)), total=len(predicted_data.name_to_idx)): precision, recall, dcg, skipped = out if skipped: mc.skipped.update(1) continue else: mc.skipped.update(0) mc.precision.update(precision) mc.recall.update(recall) mc.dcg.update(dcg) # break # break mc.log_metrics()
def train_loop(self, input_fn, **kwargs): metricContainer = MetricContainer(["loss"]) epochs = self.current_version.epocs self.log("Epochs: {}".format(epochs)) epochs_end = epochs - self.epochs_params - 1 if epochs_end < 1: epochs_end = 1 self.log("Remaining epochs: {}".format(epochs_end)) self.model.train() for epoch in iterator( range(self.epochs_params, self.epochs_params + epochs_end), 1): metricContainer.reset_epoch() import time ti = time.time() for idx, (name, i, oi) in iterator(enumerate(input_fn), 20): # print(ti - time.time()) i = i.cuda() oi = oi.cuda() out = self.model(oi) loss = self.criterion(out, i) self.optimizer.zero_grad() loss.backward() self.optimizer.step() metricContainer.loss.update(loss.item(), 1) if idx % 100 == 0: imshow_tensor(out, i.shape) out_string_step = "Epoch: {} Step: {}".format( epoch + 1, idx + 1) self.log("----------------------", log_to_file=False) self.log(out_string_step, log_to_file=False) metricContainer.log_metrics(log_to_file=False) metricContainer.reset() ti = time.time() if epoch % 5 == 0: self.save_checkpoint(epoch) self.log("=========== Epoch Stats: ===========", log_to_file=False) self.log(out_string_step, log_to_file=True) metricContainer.log_metrics(metrics=None, log_to_file=True, complete_epoch=True, items_per_row=5, charachters_per_row=100, step=epoch + 1) self.log("=========== Epoch Ends =============", log_to_file=False) metricContainer.reset_epoch()
def train_loop(self, input_fn, **kwargs): metricContainer = MetricContainer(self.metrics) epochs = self.current_version.epocs self.log("Epochs: {}".format(epochs)) epochs_end = epochs - self.epochs_params - 1 if epochs_end < 1: epochs_end = 1 self.log("Remaining epochs: {}".format(epochs_end), log_to_file=True) self.log("Steps per epoch: {}".format(len(input_fn)), log_to_file=True) self.model.train() for epoch in iterator( range(self.epochs_params, self.epochs_params + epochs_end), 1): metricContainer.reset_epoch() epoch_misses_loss = 0 epoch_misses_cuda = 0 for idx, (name, i, targets) in iterator(enumerate(input_fn), 20): i = torch.stack(i).cuda() try: out = self.model(i, targets=targets) loss = sum(list(out.values())) self.optimizer.zero_grad() loss.backward() if loss.item() > 50: val = loss.detach().item() self.log( "loss over threshold, breaking: {}".format(val), level=40, log_to_file=True) epoch_misses_loss += 1 continue self.optimizer.step() except RuntimeError as e: tb = traceback.format_exc() self.log(f"Failed step: RuntimeError {e}", level=40, log_to_file=True) # error self.log(f"traceback: \n {tb}", level=40, log_to_file=True) self.log( "Error happened with \nnames:{name}".format(name=name), level=40, log_to_file=True) self.log("Lengths: {}".format( [len(el["boxes"]) for el in targets]), level=40, log_to_file=True) epoch_misses_cuda += 1 metricContainer.update({k: v.item() for k, v in out.items()}, 1) metricContainer.loss.update(loss.item(), 1) if idx % 50 == 0: self.model.eval() name = name[:1] i = i[:1] targets = targets[:1] out = self.model(i) visualize_objects(name, i, out, targets, 10, labels_to_class=self.labels_to_class, colors=self.colors) self.model.train() if idx % 500 == 0: step = epoch * self.dataloader.get_train_sample_count( ) + idx out_string_step = "Epoch: {} Step: {}".format( epoch + 1, idx + 1) self.log("----------------------", log_to_file=False) self.log(out_string_step, log_to_file=False) metricContainer.log_metrics(log_to_file=True, step=step) metricContainer.reset() if idx % 6000 == 0 and idx > 0: self.save_checkpoint(epoch) self.save_checkpoint(epoch) self.log("=========== Epoch Stats: ===========", log_to_file=False) out_string_step = "Epoch: {} Step: {}".format(epoch + 1, idx + 1) self.log(out_string_step, log_to_file=True) metricContainer.log_metrics(metrics=None, log_to_file=True, complete_epoch=True, items_per_row=5, charachters_per_row=100, name_prefix="epoch_", step=epoch + 1) self.log(f"steps missed: {epoch_misses_loss + epoch_misses_cuda}", log_to_file=True) self.log(f" loss threshold: {epoch_misses_loss}", log_to_file=True) self.log(f" cuda memory: {epoch_misses_cuda}", log_to_file=True) self.log("=========== Epoch Ends =============", log_to_file=False) metricContainer.reset_epoch()