def create_baseline_trainer(model, optimizer=None, name='train', device=None): if device is not None: model.to(device) is_train = optimizer is not None def _update(engine, batch): model.train(is_train) with torch.set_grad_enabled(is_train): images, labels = convert_tensor(batch, device=device) preds = model(images) loss = F.cross_entropy(preds, labels) if is_train: optimizer.zero_grad() loss.backward() optimizer.step() return {'loss': loss.item(), 'y_pred': preds, 'y': labels} engine = Engine(_update) engine.name = name metrics.Average(lambda o: o['loss']).attach(engine, 'single_loss') metrics.Accuracy(lambda o: (o['y_pred'], o['y'])).attach( engine, 'single_acc') return engine
def more_metrics(self, metrics_: OrderedDict): metrics_['loss'] = metrics.Loss(nn.CrossEntropyLoss()) metrics_['accuracy'] = metrics.Accuracy() metrics_['recall'] = metrics.Recall() metrics_['precision'] = metrics.Precision() metrics_['confusion_matrix'] = metrics.ConfusionMatrix( 8, average='recall')
def get_metric(metric): if metric == 'mse': return M.MeanSquaredError() elif metric == 'xent': return M.Loss(nn.CrossEntropyLoss()) elif metric == 'acc': return M.Accuracy() raise ValueError('Unrecognized metric {}.'.format(metric))
def get_metrics_fn() -> Dict[str, _metrics.Metric]: def rounded_transform(output): y_pred, y = output return torch.round(y_pred), y transform = rounded_transform accuracy = _metrics.Accuracy(transform, device=self.device) precision = _metrics.Precision(transform, device=self.device) recall = _metrics.Recall(transform, device=self.device) f1 = precision * recall * 2 / (precision + recall + 1e-20) return { 'loss': _metrics.Loss(loss_fn), 'accuracy': accuracy, 'precision': precision, 'recall': recall, 'f1': f1 }
def get_metric(metric): name = metric['name'] params = metric['params'] if name == 'mse': return M.MeanSquaredError(**params) elif name == 'vae': return M.Loss(GaussianVAELoss(**params)) elif name == 'kl-div': return M.Loss(GaussianKLDivergence(**params)) elif name == 'recons_nll': return M.Loss(ReconstructionNLL(**params)) elif name == 'bxent': return M.Loss(nn.BCEWithLogitsLoss(**params)) elif name == 'xent': return M.Loss(nn.CrossEntropyLoss(**params)) elif name == 'acc': return M.Accuracy(**params) raise ValueError('Unrecognized metric {}.'.format(metric))
def run_ort(args, params, create_new_dir=True): reset_global_worth_manager() params = params.copy() params["model_dir"] = os.path.join(args["model_dir"], params["exp_name"]) if create_new_dir: timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S') params["model_dir"] = os.path.join(params["model_dir"], timestamp) os.makedirs(params["model_dir"], exist_ok=True) init_logger(params["model_dir"]) logger = logging.getLogger(__name__) # Save model parameter settings to model directory. with open(os.path.join(params["model_dir"], "model_params.json"), "w") as out_fh: json.dump(params, out_fh, indent=2, skipkeys=True) logger.info("Model parameters:") logger.info(json.dumps(params, indent=2, sort_keys=True)) # Make parameter objects that aren't json serializable. params["device"] = torch.device(params["device"]) # dtype_dict = { # "float32": torch.float32, # "float64": torch.float64 # } # params["dtype"] = dtype_dict[params["dtype"]] dataset = FashionMNIST(params) # Construct the estimator. # Config that prevents TF from allocating the whole GPU memory. # config = tf.ConfigProto( # allow_soft_placement=True, # log_device_placement=False # ) # config.gpu_options.allow_growth = True # Calculate the number of steps between summaries, so that summaries per epoch stays the same. summaries_per_epoch = 3 save_summary_steps = math.ceil(dataset.steps_per_epoch / summaries_per_epoch) params["total_steps"] = dataset.steps_per_epoch * params["train_epochs"] get_worth_manager().load_hparams(params) model = FFNN(params).to(params["device"]) estimator_config = EstimatorConfig(model_dir=params["model_dir"], device=params["device"], save_summary_steps=save_summary_steps # evaluate_steps=1000 ) estimator = Estimator(model=model, params=params, config=estimator_config, eval_data_iter=dataset.get_eval_iterator()) estimator.add_metric("accuracy", metrics.Accuracy()) logger.info(model) train_loop(params, estimator, dataset)
class Metrics(enum.Enum): train_class_metrics: t.Dict[str, im.Metric] = { 'acc_1': im.RunningAverage(im.Accuracy(output_transform=lambda x: x[1:3])), 'acc_5': im.RunningAverage( im.TopKCategoricalAccuracy(k=5, output_transform=lambda x: x[1:3])), 'ce_loss': im.RunningAverage(output_transform=lambda x: x[0]), 'total_loss': im.RunningAverage(output_transform=lambda x: x[0]) } train_ae_metrics: t.Dict[str, im.Metric] = { 'acc_1': im.RunningAverage( im.Accuracy(output_transform=lambda x: (x[1], x[5]))), 'acc_5': im.RunningAverage( im.TopKCategoricalAccuracy(k=5, output_transform=lambda x: (x[1], x[5]))), 'ce_loss': train_ae_ce_loss, 'l1_loss': train_ae_l1_loss, 'total_loss': train_ae_total_loss } train_gsnn_metrics: t.Dict[str, im.Metric] = { 'acc_1': im.RunningAverage( im.Accuracy( output_transform=lambda x: (x[0].squeeze(dim=1), x[6]))), 'acc_5': im.RunningAverage( im.TopKCategoricalAccuracy(k=5, output_transform=lambda x: (x[0].squeeze(dim=1), x[6]))), 'ce_loss': train_gsnn_ce_loss, 'kld_loss': train_gsnn_kld_loss, 'total_loss': train_gsnn_total_loss, 'kld_factor': train_gsnn_kld_factor, } train_vae_metrics: t.Dict[str, im.Metric] = { 'acc_1': im.RunningAverage( im.Accuracy( output_transform=lambda x: (x[1].squeeze(dim=1), x[7]))), 'acc_5': im.RunningAverage( im.TopKCategoricalAccuracy(k=5, output_transform=lambda x: (x[1].squeeze(dim=1), x[7]))), 'ce_loss': train_vae_ce_loss, 'l1_loss': train_vae_l1_loss, 'kld_loss': train_vae_kld_loss, 'total_loss': train_vae_total_loss, 'kld_factor': train_vae_kld_factor, } eval_class_metrics = { 'acc_1': im.Accuracy(output_transform=lambda x: x[0:2]), 'acc_5': im.TopKCategoricalAccuracy(k=5, output_transform=lambda x: x[0:2]), 'ce_loss': im.Loss(nn.CrossEntropyLoss(), output_transform=lambda x: x[0:2]), 'total_loss': im.Loss(nn.CrossEntropyLoss(), output_transform=lambda x: x[0:2]) } eval_ae_metrics = { 'acc_1': im.Accuracy(output_transform=lambda x: (x[1], x[5])), 'acc_5': im.TopKCategoricalAccuracy(k=5, output_transform=lambda x: (x[1], x[5])), 'ce_loss': eval_ae_loss_metric[0], 'l1_loss': eval_ae_loss_metric[1], 'total_loss': eval_ae_total_loss, } eval_gsnn_metrics = { 'acc_1': im.Accuracy(output_transform=lambda x: (x[-1], x[-2])), 'acc_5': im.TopKCategoricalAccuracy(k=5, output_transform=lambda x: (x[-1], x[-2])), 'ce_loss': eval_gsnn_loss_metric[0], 'kld_loss': eval_gsnn_loss_metric[1], 'total_loss': eval_gsnn_total_loss, } eval_vae_metrics = { 'acc_1': im.Accuracy(output_transform=lambda x: (x[-1], x[-2])), 'acc_5': im.TopKCategoricalAccuracy(k=5, output_transform=lambda x: (x[-1], x[-2])), 'ce_loss': eval_vae_loss_metric[0], 'l1_loss': eval_vae_loss_metric[1], 'kld_loss': eval_vae_loss_metric[2], 'total_loss': eval_vae_total_loss, }
def more_metrics(self, metrics_: OrderedDict): metrics_['loss'] = metrics.Loss(nn.CrossEntropyLoss()) metrics_['accuracy'] = metrics.Accuracy() metrics_['recall'] = metrics.Recall() metrics_['precision'] = metrics.Precision()
lr = Pipeline([("scaler", StandardScaler()), ("lr", LogisticRegression(max_iter=10000))]) lr.fit(z_tr, y_tr) acc = lr.score(z_ts, y_ts) return { "y": y, "loss": l, "y_pred": y_probs, "y_probs": y_probs, "lr_acc": acc } eval_engine = Engine(batch_eval) metrics.Accuracy().attach(eval_engine, "accuracy") metrics.Average().attach(train_engine, "average_loss") metrics.Average(output_transform=lambda x: x["lr_acc"]).attach( eval_engine, "lr_acc") metrics.Average(output_transform=lambda x: x["loss"]).attach( eval_engine, "average_loss") @eval_engine.on(Events.EPOCH_COMPLETED) def log_tboard(engine): tb.add_scalar( "train/loss", train_engine.state.metrics["average_loss"], train_engine.state.epoch, ) tb.add_scalar( "eval/loss",
def create_sla_trainer(model, transform, optimizer=None, with_large_loss=False, name='train', device=None): if device is not None: model.to(device) is_train = optimizer is not None def _update(engine, batch): model.train(is_train) with torch.set_grad_enabled(is_train): images, labels = convert_tensor(batch, device=device) batch_size = images.shape[0] images = transform(model, images, labels) n = images.shape[0] // batch_size preds = model(images) labels = torch.stack([labels * n + i for i in range(n)], 1).view(-1) loss = F.cross_entropy(preds, labels) if with_large_loss: loss = loss * n single_preds = preds[::n, ::n] single_labels = labels[::n] // n agg_preds = 0 for i in range(n): agg_preds = agg_preds + preds[i::n, i::n] / n if is_train: optimizer.zero_grad() loss.backward() optimizer.step() return { 'loss': loss.item(), 'preds': preds, 'labels': labels, 'single_preds': single_preds, 'single_labels': single_labels, 'agg_preds': agg_preds, } engine = Engine(_update) engine.name = name metrics.Average(lambda o: o['loss']).attach(engine, 'total_loss') metrics.Accuracy(lambda o: (o['preds'], o['labels'])).attach( engine, 'total_acc') metrics.Average(lambda o: F.cross_entropy(o['single_preds'], o[ 'single_labels'])).attach(engine, 'single_loss') metrics.Accuracy(lambda o: (o['single_preds'], o['single_labels'])).attach( engine, 'single_acc') metrics.Average( lambda o: F.cross_entropy(o['agg_preds'], o['single_labels'])).attach( engine, 'agg_loss') metrics.Accuracy(lambda o: (o['agg_preds'], o['single_labels'])).attach( engine, 'agg_acc') return engine
def create_sla_sd_trainer(model, transform, optimizer=None, T=1.0, with_large_loss=False, name='train', device=None): if device is not None: model.to(device) is_train = optimizer is not None def _update(engine, batch): model.train(is_train) with torch.set_grad_enabled(is_train): images, single_labels = convert_tensor(batch, device=device) batch_size = images.shape[0] images = transform(model, images, single_labels) n = images.shape[0] // batch_size joint_preds, single_preds = model(images, None) single_preds = single_preds[::n] joint_labels = torch.stack( [single_labels * n + i for i in range(n)], 1).view(-1) joint_loss = F.cross_entropy(joint_preds, joint_labels) single_loss = F.cross_entropy(single_preds, single_labels) if with_large_loss: joint_loss = joint_loss * n agg_preds = 0 for i in range(n): agg_preds = agg_preds + joint_preds[i::n, i::n] / n distillation_loss = F.kl_div(F.log_softmax(single_preds / T, 1), F.softmax(agg_preds.detach() / T, 1), reduction='batchmean') loss = joint_loss + single_loss + distillation_loss.mul(T**2) if is_train: optimizer.zero_grad() loss.backward() optimizer.step() return { 'loss': loss.item(), 'preds': joint_preds, 'labels': joint_labels, 'single_preds': single_preds, 'single_labels': single_labels, 'agg_preds': agg_preds, } engine = Engine(_update) engine.name = name metrics.Average(lambda o: o['loss']).attach(engine, 'total_loss') metrics.Accuracy(lambda o: (o['preds'], o['labels'])).attach( engine, 'total_acc') metrics.Average(lambda o: F.cross_entropy(o['single_preds'], o[ 'single_labels'])).attach(engine, 'single_loss') metrics.Accuracy(lambda o: (o['single_preds'], o['single_labels'])).attach( engine, 'single_acc') metrics.Average( lambda o: F.cross_entropy(o['agg_preds'], o['single_labels'])).attach( engine, 'agg_loss') metrics.Accuracy(lambda o: (o['agg_preds'], o['single_labels'])).attach( engine, 'agg_acc') return engine
# tt.Normalize (0.0, 1.0) # ]) transforms=None DSet = NpyClfDatasets (CCSN, MSS, CHIRP, DSIR, transform=transforms) train_l, val_l = DSet.train_test_split (random_state=24, test_size=0.25) t_DataLoader = tud.DataLoader (DSet, sampler=train_l, batch_size=10, pin_memory=True) v_DataLoader = tud.DataLoader (DSet, sampler=val_l, batch_size=10, pin_memory=True) ######################### DESC = "Epoch {} - loss {:.2f}" PBAR = tqdm (initial=0, leave=False, total=len(t_DataLoader), desc=DESC.format(0, 0)) CLF = CNN_ONE(idx=50) LFN = tn.CrossEntropyLoss() OPM = to.Adam(CLF.parameters(), lr=1e-3,) VAL_METRICS = { 'loss':im.Loss (LFN), 'acc':im.Accuracy(), 'recall':im.Recall(), 'precision':im.Precision(), 'cfm':im.ConfusionMatrix (3), } L_TRAIN = [] L_EVAL = [] L_ACC = [] L_PRE = [] L_REC = [] L_CFM = [] ######################### def train_step(engine, batch): CLF.train() OPM.zero_grad() x, y = batch['payload'], batch['target']
_optimizer = init_optimizer(config) optimizer = _optimizer(model.parameters(), lr = config["learning_rate"]) trainer = engine.create_supervised_trainer( model = model, optimizer = optimizer, loss_fn = loss_fn, device = device, non_blocking = True, ) evaluator = engine.create_supervised_evaluator( model = model, metrics={ "Loss": metrics.Loss(nn.CrossEntropyLoss()), "[email protected]": metrics.Accuracy(thresholded_transform(0.3)), "[email protected]": metrics.Accuracy(thresholded_transform(0.3)), "IOU": metrics.IoU(metrics.ConfusionMatrix(num_classes = config["n_classes"])), "mIOU": metrics.mIoU(metrics.ConfusionMatrix(num_classes = config["n_classes"])), # "FPS": metrics.Frequency(output_transform=lambda x: x[0]), }, device = device, non_blocking=True, output_transform = lambda x, y, y_pred: (torch.sigmoid(y_pred["out"]), y), ) writer = tensorboard.SummaryWriter(log_dir=f'summary/{config["model_tag"]}') attach_metric_logger(evaluator, eval_loader, 'val', writer=writer) attach_training_logger(trainer, writer=writer, log_interval=1) attach_model_checkpoint(trainer, {config["model_tag"]: model.module}, args.name)
def train(): # initiate command line arguments, configuration file and logging block args = parse_args() config = read_config() try: if args.overwrite: shutil.rmtree(f"./logs/{args.name}", ignore_errors=True) os.mkdir(f"./logs/{args.name}") except: print(f"log folder {args.name} already exits.") init_logging(log_path=f"./logs/{args.name}") # determine train model on which device, cuda or cpu device = 'cuda' if torch.cuda.is_available() else 'cpu' logger.info(f"running training on {device}") device += f':{args.main_cuda}' # prepare training and validation datasets logger.info('creating dataset and data loaders') dataset = args.dataset train_dataset = AerialDataset("train", dataset, config[dataset]["train"]["image_path"], config[dataset]["train"]["mask_path"]) val_dataset = AerialDataset("val", dataset, config[dataset]["val"]["image_path"], config[dataset]["val"]["mask_path"]) train_loader, train_metrics_loader, val_metrics_loader = create_data_loaders( train_dataset=train_dataset, val_dataset=val_dataset, num_workers=config["num_workers"], batch_size=config["batchsize"], ) # create model logger.info( f'creating BiseNetv2 and optimizer with initial lr of {config["learning_rate"]}' ) model = BiSeNetV2(config["n_classes"]) model = nn.DataParallel(model, device_ids=[x for x in range(args.main_cuda, 4) ]).to(device) # initiate loss function and optimizer optimizer_fn = init_optimizer(config) optimizer = optimizer_fn(model.parameters(), lr=config["learning_rate"]) logger.info('creating trainer and evaluator engines') _loss_fn = init_loss(config["loss_fn"]) loss_fn = LossWithAux(_loss_fn) # create trainer and evaluator wiht ignite.engine trainer = engine.create_supervised_trainer( model=model, optimizer=optimizer, loss_fn=loss_fn, device=device, non_blocking=True, ) evaluator = engine.create_supervised_evaluator( model=model, metrics={ 'loss': metrics.Loss(nn.CrossEntropyLoss()), "[email protected]": metrics.Accuracy(thresholded_transform(0.3)), "[email protected]": metrics.Accuracy(thresholded_transform(0.3)), "IOU": metrics.IoU( metrics.ConfusionMatrix(num_classes=config["n_classes"])), "mIOU": metrics.mIoU( metrics.ConfusionMatrix(num_classes=config["n_classes"])), }, device=device, non_blocking=True, output_transform=lambda x, y, y_pred: (torch.sigmoid(y_pred["out"]), y), ) # attach event listener to do post process after each iteration and epoch logger.info(f'creating summary writer with tag {config["model_tag"]}') writer = tensorboard.SummaryWriter(log_dir=f'logs/{config["model_tag"]}') # logger.info('attaching lr scheduler') # lr_scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9) # attach_lr_scheduler(trainer, lr_scheduler, writer) logger.info('attaching event driven calls') attach_model_checkpoint(trainer, {config["model_tag"]: model.module}, args.name) attach_training_logger(trainer, writer=writer) attach_metric_logger(trainer, evaluator, 'train', train_metrics_loader, writer) attach_metric_logger(trainer, evaluator, 'val', val_metrics_loader, writer) # start training (evaluation is included too) logger.info('training...') trainer.run(train_loader, max_epochs=config["epochs"])