def on_loader_end(self, runner: IRunner): if runner.loader_name == self.pseudolabel_loader: predictions = np.array(self.predictions) max_pred = np.argmax(predictions, axis=1) max_score = np.amax(predictions, axis=1) confident_mask = max_score > self.prob_threshold num_samples = len(predictions) for index, predicted_target, score in zip( range(num_samples, max_pred, max_score)): target = predicted_target if score > self.prob_threshold else self.unlabeled_class self.unlabeled_ds.set_target(index, target) num_confident_samples = confident_mask.sum() runner.loader_metrics[ "pseudolabeling/confident_samples"] = num_confident_samples runner.loader_metrics[ "pseudolabeling/confident_samples_mean_score"] = max_score[ confident_mask].mean() runner.loader_metrics["pseudolabeling/unconfident_samples"] = len( predictions) - num_confident_samples runner.loader_metrics[ "pseudolabeling/unconfident_samples_mean_score"] = max_score[ ~confident_mask].mean()
def run_catalyst(irunner: dl.IRunner, idx: int, device: str = "cuda", num_epochs: int = 10): utils.set_global_seed(idx) loader = irunner.get_loaders()["train"] model = irunner.get_model().to(device) criterion = irunner.get_criterion() optimizer = irunner.get_optimizer(model) runner = dl.SupervisedRunner() runner.train( engine=dl.GPUEngine() if device == "cuda" else dl.CPUEngine(), model=model, criterion=criterion, optimizer=optimizer, loaders={"train": loader}, num_epochs=num_epochs, verbose=False, callbacks=[ dl.AccuracyCallback( input_key=runner._output_key, target_key=runner._target_key, topk=(1, ), ) ], ) return ( runner.epoch_metrics["train"]["accuracy01"], runner.epoch_metrics["train"]["loss"], _get_used_memory(), )
def on_epoch_end(self, runner: dl.IRunner): runner.epoch_metrics["_epoch_"][ "num_trajectories"] = self.replay_buffer.num_trajectories runner.epoch_metrics["_epoch_"][ "num_transitions"] = self.replay_buffer.num_transitions runner.epoch_metrics["_epoch_"]["updates_per_sample"] = ( runner.loader_sample_step / self.replay_buffer.num_transitions) runner.epoch_metrics["_epoch_"]["reward"] = np.mean( self.replay_buffer._trajectories_rewards[-100:]) self._sync_checkpoint(runner=runner) self.replay_buffer.recalculate_index()
def on_epoch_end(self, runner: dl.IRunner): self.actor.eval() valid_rewards, valid_steps = generate_sessions( env=self.env, network=self.actor, num_sessions=self.num_valid_sessions ) self.actor.train() valid_rewards /= float(self.num_valid_sessions) valid_steps /= float(self.num_valid_sessions) runner.epoch_metrics["_epoch_"]["v_reward"] = valid_rewards runner.epoch_metrics["_epoch_"]["v_steps"] = valid_steps
def on_epoch_start(self, runner: dl.IRunner): self.actor = runner.model self.actor.eval() train_rewards, train_steps = generate_sessions( env=self.env, network=self.actor, rollout_buffer=self.rollout_buffer, num_sessions=self.num_train_sessions, ) train_rewards /= float(self.num_train_sessions) train_steps /= float(self.num_train_sessions) runner.epoch_metrics["_epoch_"]["t_reward"] = train_rewards runner.epoch_metrics["_epoch_"]["t_steps"] = train_steps self.actor.train()
def on_epoch_end(self, runner: dl.IRunner): self.actor.eval() valid_rewards, valid_steps = generate_sessions( env=self.env, network=self.actor, num_sessions=int(self.num_valid_sessions)) self.actor.train() valid_rewards /= float(self.num_valid_sessions) valid_steps /= float(self.num_valid_sessions) runner.epoch_metrics["_epoch_"]["num_sessions"] = self.session_counter runner.epoch_metrics["_epoch_"]["num_samples"] = self.session_steps runner.epoch_metrics["_epoch_"]["updates_per_sample"] = ( runner.loader_sample_step / self.session_steps) runner.epoch_metrics["_epoch_"]["v_reward"] = valid_rewards runner.epoch_metrics["_epoch_"]["v_steps"] = valid_steps
def on_loader_end(self, runner: IRunner): metric_name = self.prefix targets = np.array(self.targets) outputs = np.array(self.outputs) metric = self.metric_fn(outputs, targets) runner.loader_metrics[metric_name] = metric
def on_batch_start(self, state: IRunner): if not self.is_needed: return is_batch_needed = np.random.random() < self.p if is_batch_needed: lam = np.random.beta(self.alpha, self.alpha) else: lam = 1 index = torch.randperm(state.input[self.fields[0]].shape[0]).to(state.device) state.input["mixup_index"] = index state.input["mixup_lambda"] = lam for f in self.fields: a = lam * state.input[f] b = (1 - lam) * state.input[f][index] state.input[f] = a + b
def run_pytorch(irunner: dl.IRunner, idx: int, device: str = "cuda", num_epochs: int = 10): device = torch.device(device) utils.set_global_seed(idx) loader = irunner.get_loaders()["train"] model = irunner.get_model().to(device) criterion = irunner.get_criterion() optimizer = irunner.get_optimizer(model) epoch_scores = [] epoch_losses = [] for i in range(num_epochs): epoch_score = 0 epoch_loss = 0 for features, targets in loader: features = features.to(device) targets = targets.to(device) logits = model(features) loss = criterion(logits, targets) epoch_loss += loss.item() pred = logits.argmax(dim=1, keepdim=True) epoch_score += pred.eq(targets.view_as(pred)).sum().item() self.engine.backward(loss) optimizer.step() optimizer.zero_grad() epoch_score /= len(loader.dataset) epoch_loss /= len(loader) print(f"Epoch {i} \t Score: {epoch_score} \t Loss: {epoch_loss}") epoch_scores.append(epoch_score) epoch_losses.append(epoch_loss) return epoch_scores[-1], epoch_losses[-1], _get_used_memory()
def on_batch_end(self, runner: IRunner): if self.metrics_to_monitor is not None: keys = self.metrics_to_monitor else: keys = runner.batch_metrics.keys() for key in keys: if _any_is_nan(runner.batch_metrics[key]): print( f"Stopping training due to NaN presence in {key} metric at epoch {runner.global_epoch}." f"batch_metrics={{{runner.batch_metrics}}}") runner.need_early_stop = True
def test_benchmark( tmpdir, irunner: dl.IRunner, device: str, num_epochs: int, num_runs: int, precision: int, max_diff_time: float, max_diff_memory: float, ): irunner = irunner() # prepare data _ = irunner.get_loaders() # score runs pytorch = score_runs( irunner, mode=RunMode.pytorch, device=device, num_epochs=num_epochs, num_runs=num_runs, ) catalyst = score_runs( irunner, mode=RunMode.catalyst, device=device, num_epochs=num_epochs, num_runs=num_runs, ) # check performance print("Scores are for... \n " f"PyTorch: {pytorch['scores']} \n Catalyst: {catalyst['scores']}") for catalyst_, pytorch_ in zip(catalyst["scores"], pytorch["scores"]): np.testing.assert_almost_equal(catalyst_, pytorch_, precision) # check loss print("Losses are for... \n " f"PyTorch: {pytorch['losses']} \n Catalyst: {catalyst['losses']}") for catalyst_, pytorch_ in zip(catalyst["losses"], pytorch["losses"]): np.testing.assert_almost_equal(catalyst_, pytorch_, precision) # check time print( f"Times are for... \n PyTorch: {pytorch['time']} \n Catalyst: {catalyst['time']}" ) assert_absolute_equal( catalyst["time"], pytorch["time"], norm=num_epochs, max_diff=max_diff_time, ) # check memory if torch.cuda.is_available(): print( "Memory usages are for... \n " f"PyTorch: {pytorch['memory']} \n Catalyst: {catalyst['memory']}") assert_relative_equal(catalyst["memory"], pytorch["memory"], max_diff=max_diff_memory)
def on_epoch_start(self, runner: IRunner): if runner.loader_name == "train": self.tsa_threshold = self.get_tsa_threshold( runner.epoch, "exp_schedule", 1.0 / self.num_classes, 1.0) runner.epoch_metrics["train"]["tsa_threshold"] = self.tsa_threshold