def __init__(self,args,crition: nn.CrossEntropyLoss): self.args = args self.model_name = args.net self.config = self._parse_args(args.config) net_module = importlib.import_module(f"net.{self.model_name}") self.model_class = getattr(net_module, self.model_name) self.model:torch.nn.Module = self.model_class(*self._parse_model_args()) self.numclass = self.config['numclass'] self.save_path = self.config['save_path'] self.batch_size = self.config['batch_size'] self.crition = crition self.metricer = Metrics(self.numclass) self.test_dataloader = get_data_loader( self.config['test_data_path'], self.config['test_annot_path'], self.numclass, img_size=self.config['ori_size'], batch_size=8, name=self.config['dataset_name'], mode='test', return_name=True, ) self.model.load_state_dict(torch.load(self.save_path, map_location=torch.device("cuda:0")),strict=False) if torch.cuda.is_available(): self.model = self.model.cuda()
def valid(model, valid_dataloader, total_batch): model.eval() # Metrics_logger initialization metrics = Metrics(['recall', 'specificity', 'precision', 'F1', 'F2', 'ACC_overall', 'IoU_poly', 'IoU_bg', 'IoU_mean']) with torch.no_grad(): bar = tqdm(enumerate(valid_dataloader), total=total_batch) for i, data in bar: img, gt = data['image'], data['label'] if opt.use_gpu: img = img.cuda() gt = gt.cuda() output = model(img) _recall, _specificity, _precision, _F1, _F2, \ _ACC_overall, _IoU_poly, _IoU_bg, _IoU_mean = evaluate(output, gt) metrics.update(recall= _recall, specificity= _specificity, precision= _precision, F1= _F1, F2= _F2, ACC_overall= _ACC_overall, IoU_poly= _IoU_poly, IoU_bg= _IoU_bg, IoU_mean= _IoU_mean ) metrics_result = metrics.mean(total_batch) return metrics_result
def run_liver(istrain=False, model_name='unet', modelcheckpoint='cache/liver/model/unet.hdf5', model_pretrain='cache/liver/model/weight_unet_gen_tf.h5', batch_size=1, nb_epoch=500, is_datagen=False, channel=5): reader = DataSetVolumn() seg = SegmentationBatch(model_name, modelcheckpoint) if istrain: current_dir = os.path.dirname(os.path.realpath(__file__)) weights_path = os.path.expanduser( os.path.join(current_dir, model_pretrain)) if model_pretrain else None seg.train(reader, weights_path=weights_path, batch_size=batch_size, nb_epoch=nb_epoch, is_datagen=is_datagen, channel=channel) testdata = reader.load_testdata_channel(channel) metrics_testdata = [] for imagefile, data in testdata.iteritems(): X_test, y_test = data predicts = seg.predict(X_test, batch_size=batch_size) pprint(Metrics.all(y_test, predicts)) metrics_testdata.append((imagefile, Metrics.all(y_test, predicts))) result = { 'acc': sum([metrics['acc'] for imagefile, metrics in metrics_testdata]) / len(metrics_testdata), 'dice': sum([metrics['dice'] for imagefile, metrics in metrics_testdata]) / len(metrics_testdata), 'jacc': sum([metrics['jacc'] for imagefile, metrics in metrics_testdata]) / len(metrics_testdata), 'sensitivity': sum([ metrics['sensitivity'] for imagefile, metrics in metrics_testdata ]) / len(metrics_testdata), 'specificity': sum([ metrics['specificity'] for imagefile, metrics in metrics_testdata ]) / len(metrics_testdata) } print('the average metrics case by case') pprint(result) return (X_test, y_test, predicts)
def __init__(self, env: UnityEnvironment, state_space: int, action_space: int, actor_network_builder: Callable[[int, int], nn.Module], critic_network_builder: Callable[[int, int], nn.Module], gamma: float, batch_size: int, target_update: int, use_soft_updates: bool, policy_update: int, num_policy_updates: int, min_samples_in_memory: int, memory_capacity: int, device: str, tb_logger: SummaryWriter): self._device = torch.device(device) self._dtype = torch.float self._actor = actor_network_builder(state_space, action_space).to( device=self._device, dtype=self._dtype).train() self._actor_target = actor_network_builder( state_space, action_space).to(device=self._device, dtype=self._dtype) self._actor_target.load_state_dict(self._actor.state_dict()) self._actor_target.eval() self._critic = critic_network_builder(state_space, action_space).to( device=self._device, dtype=self._dtype).train() self._critic_target = critic_network_builder( state_space, action_space).to(device=self._device, dtype=self._dtype) self._critic_target.load_state_dict(self._critic.state_dict()) self._critic_target.eval() self._actor_optimizer = optim.Adam(self._actor.parameters(), lr=1e-4) self._critic_optimizer = optim.Adam(self._critic.parameters(), lr=1e-4, weight_decay=0) self._env = env self._state_space = state_space self._action_space = action_space self._brain_name = self._env.brain_names[0] self._memory = ReplayMemory(memory_capacity) self._metrics = Metrics() self._gamma = gamma self._batch_size = batch_size self._target_update = target_update self._use_soft_update = use_soft_updates self._policy_update = policy_update self._num_policy_updates = num_policy_updates self._min_samples_in_memory = min_samples_in_memory self._noise = OrnsteinUhlenbeckProcess(dims=self._action_space, mu_start=1., mu_final=0., theta=0.015, sigma=0.2, dt=0.005) self._tb = tb_logger
def get_metrics(self, predictions, y, ix2pred, pred2ix): # Metrics # predictions = self.model.predict_classes(x) y_ = np.argmax(a=y, axis=-1) print(np.mean(np.equal(predictions, y_))) Metrics.report(y_, predictions, [i for i in ix2pred], [i for i in pred2ix], True) print(Metrics.pr_rc_fscore_sup(y_, predictions, "micro", True)) print(Metrics.pr_rc_fscore_sup(y_, predictions, "macro", True))
def test(): print('loading data......') test_data = getattr(datasets, opt.dataset)(opt.root, opt.test_data_dir, mode='test') test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False, num_workers=opt.num_workers) total_batch = int(len(test_data) / 1) model = generate_model(opt) model.eval() # metrics_logger initialization metrics = Metrics([ 'recall', 'specificity', 'precision', 'F1', 'F2', 'ACC_overall', 'IoU_poly', 'IoU_bg', 'IoU_mean' ]) with torch.no_grad(): bar = tqdm(enumerate(test_dataloader), total=total_batch) for i, data in bar: img, gt = data['image'], data['label'] if opt.use_gpu: img = img.cuda() gt = gt.cuda() output = model(img) _recall, _specificity, _precision, _F1, _F2, \ _ACC_overall, _IoU_poly, _IoU_bg, _IoU_mean = evaluate(output, gt) metrics.update(recall=_recall, specificity=_specificity, precision=_precision, F1=_F1, F2=_F2, ACC_overall=_ACC_overall, IoU_poly=_IoU_poly, IoU_bg=_IoU_bg, IoU_mean=_IoU_mean) metrics_result = metrics.mean(total_batch) print("Test Result:") print( 'recall: %.4f, specificity: %.4f, precision: %.4f, F1: %.4f, F2: %.4f, ' 'ACC_overall: %.4f, IoU_poly: %.4f, IoU_bg: %.4f, IoU_mean: %.4f' % (metrics_result['recall'], metrics_result['specificity'], metrics_result['precision'], metrics_result['F1'], metrics_result['F2'], metrics_result['ACC_overall'], metrics_result['IoU_poly'], metrics_result['IoU_bg'], metrics_result['IoU_mean']))
def __init__(self, args, crition: nn.CrossEntropyLoss, optimzer): seed = random.randint(0, 1000) random.seed(seed) torch.manual_seed(seed) cd.manual_seed(seed) cd.manual_seed_all(seed) self.args = args self.model_name = args.net self.config = self._parse_args(args.config) net_module = importlib.import_module(f"net.{self.model_name}") self.model_class = getattr(net_module, self.model_name) self.model = self.model_class(**self._parse_model_args()) self.crition = crition self.base_lr = self.config.get("lr", 0.01) self.optimizer = self._get_optimizer(optimzer) self.iters = self.config.get("iter", 5000) self.power = self.config.get("power", 0.9) self.numclass = self.config['numclass'] self.batch_size = self.config['batch_size'] self.print_freq = self.config['print_freq'] self.save_freq = self.config['save_freq'] self.gpu = self.config.get('gpus') print(f"gpus: {self.gpu}") if self.gpu: self.gpu = [self.gpu] if isinstance(self.gpu, int) else list( self.gpu) else: self.device = torch.device("cpu") self.train_dataloader = get_data_loader( self.config['train_data_path'], self.config['train_annot_path'], self.numclass, img_size=self.config['img_size'], batch_size=self.batch_size, name=self.config['dataset_name']) self.val_dataloader = get_data_loader(self.config['val_data_path'], self.config['val_annot_path'], self.numclass, img_size=self.config['img_size'], batch_size=self.batch_size, name=self.config['dataset_name'], mode='eval') self.metricer = Metrics(self.numclass) logdir = self._get_log_dir() self.writer = SummaryWriter(log_dir=logdir) if self.gpu: print(torch.cuda.device_count()) self.model = nn.DataParallel(self.model, device_ids=self.gpu).cuda(self.gpu[0]) # self.crition = self.crition.cuda(self.gpu[0]) cudnn.benchmark = False # 加速1 cudnn.deterministic = True
def __init__(self, options, model: nn.Module, dataset_info, append2metric=None): """ 定义联邦学习的基本的服务器, 这里的模型是在所有的客户端之间共享使用 :param options: 参数配置 :param model: 模型 :param dataset: 数据集参数 :param optimizer: 优化器 :param criterion: 损失函数类型(交叉熵,Dice系数等等) :param worker: Worker 实例 :param append2metric: 自定义metric """ self.options = options self.model, self.flops, self.params_num, self.model_bytes = self.setup_model( model=model) self.device = options['device'] # 记录总共的训练数据 client_info = self.setup_clients(dataset_info=dataset_info) # TODO orderdict 可以这么写 self.train_client_ids, self.train_clients = ( client_info['train_clients'][0], list(client_info['train_clients'][1].values())) self.test_client_ids, self.test_clients = ( client_info['test_clients'][0], list(client_info['test_clients'][1].values())) self.num_train_clients = len(self.train_client_ids) self.num_test_clients = len(self.test_client_ids) self.num_epochs = options['num_epochs'] self.num_rounds = options['num_rounds'] self.clients_per_round = options['clients_per_round'] self.save_every_round = options['save_every'] self.eval_on_train_every_round = options['eval_on_train_every'] self.eval_on_test_every_round = options['eval_on_test_every'] self.eval_on_validation_every_round = options[ 'eval_on_validation_every'] # 使用 client 的API self.global_model = model self.global_model.train() self.name = '_'.join([ '', f'wn[{options["clients_per_round"]}]', f'num_train[{self.num_train_clients}]', f'num_test[{self.num_test_clients}]' ]) self.metrics = Metrics(options=options, name=self.name, append2suffix=append2metric, result_prefix=options['result_prefix']) self.quiet = options['quiet']
def validate(loader, num_classes, device, net, scheduler, criterion): num_samples = 0 running_loss = 0 metrics = Metrics(range(num_classes)) net.eval() for images, masks in tqdm.tqdm(loader): images = images.to(device, dtype=torch.float) masks = masks.to(device) num_samples += int(images.size(0)) outputs = net(images) loss = criterion(outputs, masks) running_loss += loss.item() for mask, output in zip(masks, outputs): metrics.add(mask, output) assert num_samples > 0, "dataset contains validation images and labels" scheduler.step(metrics.get_miou()) # update learning rate return { "loss": running_loss / num_samples, "miou": metrics.get_miou(), "fg_iou": metrics.get_fg_iou(), "mcc": metrics.get_mcc(), }
def train(df, model, optimizer, logger, num_epochs, batch_size): """Train SAKT model. Arguments: df (pandas DataFrame): output by prepare_data.py model (torch Module) optimizer (torch optimizer) logger: wrapper for TensorboardX logger num_epochs (int): number of epochs to train for batch_size (int) """ train_data, val_data = get_data(df) criterion = nn.BCEWithLogitsLoss() metrics = Metrics() step = 0 for epoch in range(num_epochs): train_batches = prepare_batches(train_data, batch_size) val_batches = prepare_batches(val_data, batch_size) # Training for inputs, item_ids, labels in train_batches: inputs = inputs.cuda() preds = model(inputs) loss = compute_loss(preds, item_ids.cuda(), labels.cuda(), criterion) #loss = compute_loss(preds, item_ids, labels, criterion) train_auc = compute_auc(preds.detach().cpu(), item_ids, labels) model.zero_grad() loss.backward() optimizer.step() step += 1 metrics.store({'loss/train': loss.item()}) metrics.store({'auc/train': train_auc}) # Logging if step % 20 == 0: logger.log_scalars(metrics.average(), step) weights = {"weight/" + name: param for name, param in model.named_parameters()} grads = {"grad/" + name: param.grad for name, param in model.named_parameters() if param.grad is not None} logger.log_histograms(weights, step) logger.log_histograms(grads, step) # Validation model.eval() for inputs, item_ids, labels in val_batches: inputs = inputs.cuda() with torch.no_grad(): preds = model(inputs) val_auc = compute_auc(preds.cpu(), item_ids, labels) metrics.store({'auc/val': val_auc}) model.train()
def train(X_train, X_val, model, optimizer, logger, num_epochs, batch_size): """Train FFW model. Arguments: X (sparse matrix): output by encode_ffw.py model (torch Module) optimizer (torch optimizer) logger: wrapper for TensorboardX logger num_epochs (int): number of epochs to train for batch_size (int) """ criterion = nn.BCEWithLogitsLoss() metrics = Metrics() train_idxs = np.arange(X_train.shape[0]) val_idxs = np.arange(X_val.shape[0]) step = 0 for epoch in tqdm(range(num_epochs)): shuffle(train_idxs) shuffle(val_idxs) # Training for k in range(0, len(train_idxs), batch_size): inputs, item_ids, labels = get_tensors( X_train[train_idxs[k:k + batch_size]]) inputs = inputs.to(device=args.device) preds = model(inputs) relevant_preds = preds[torch.arange(preds.shape[0]), item_ids.to(device=args.device)] loss = criterion(relevant_preds, labels.to(device=args.device)) train_auc = compute_auc(preds.detach().cpu(), item_ids, labels) model.zero_grad() loss.backward() optimizer.step() step += 1 metrics.store({"loss/train": loss.item()}) metrics.store({"auc/train": train_auc}) # Logging if step % 20 == 0: logger.log_scalars(metrics.average(), step * batch_size) # Validation model.eval() for k in range(0, len(val_idxs), batch_size): inputs, item_ids, labels = get_tensors(X_val[val_idxs[k:k + batch_size]]) inputs = inputs.to(device=args.device) with torch.no_grad(): preds = model(inputs) val_auc = compute_auc(preds.cpu(), item_ids, labels) metrics.store({"auc/val": val_auc}) model.train()
def eval(): args = DefaultConfig() print('#' * 20, 'Start Evaluation', '#' * 20) for dataset in tqdm.tqdm(args.testdataset, total=len(args.testdataset), position=0, bar_format='{desc:<30}{percentage:3.0f}%|{bar:50}{r_bar}'): pred_path = 'E:\dataset\data\TestDataset/{}/output/'.format(dataset) gt_path = 'E:\dataset\data\TestDataset/{}/masks/'.format(dataset) preds = os.listdir(pred_path) gts = os.listdir(gt_path) total_batch = len(preds) # metrics_logger initialization metrics = Metrics(['recall', 'specificity', 'precision', 'F1', 'F2', 'ACC_overall', 'IoU_poly', 'IoU_bg', 'IoU_mean', 'Dice']) for i, sample in tqdm.tqdm(enumerate(zip(preds, gts)), desc=dataset + ' - Evaluation', total=len(preds), position=1, leave=False, bar_format='{desc:<30}{percentage:3.0f}%|{bar:50}{r_bar}'): pred, gt = sample assert os.path.splitext(pred)[0] == os.path.splitext(gt)[0] pred_mask = np.array(Image.open(os.path.join(pred_path, pred))) gt_mask = np.array(Image.open(os.path.join(gt_path, gt))) if len(pred_mask.shape) != 2: pred_mask = pred_mask[:, :, 0] if len(gt_mask.shape) != 2: gt_mask = gt_mask[:, :, 0] assert pred_mask.shape == gt_mask.shape gt_mask = gt_mask.astype(np.float64) / 255 pred_mask = pred_mask.astype(np.float64) / 255 gt_mask = torch.from_numpy(gt_mask) pred_mask = torch.from_numpy(pred_mask) _recall, _specificity, _precision, _F1, _F2, \ _ACC_overall, _IoU_poly, _IoU_bg, _IoU_mean, _Dice = evaluate(pred_mask, gt_mask, 0.5) metrics.update(recall=_recall, specificity=_specificity, precision=_precision, F1=_F1, F2=_F2, ACC_overall=_ACC_overall, IoU_poly=_IoU_poly, IoU_bg=_IoU_bg, IoU_mean=_IoU_mean, Dice=_Dice ) metrics_result = metrics.mean(total_batch) print("Test Result:") print('recall: %.4f, specificity: %.4f, precision: %.4f, F1: %.4f, F2: %.4f, ' 'ACC_overall: %.4f, IoU_poly: %.4f, IoU_bg: %.4f, IoU_mean: %.4f, Dice:%.4f' % (metrics_result['recall'], metrics_result['specificity'], metrics_result['precision'], metrics_result['F1'], metrics_result['F2'], metrics_result['ACC_overall'], metrics_result['IoU_poly'], metrics_result['IoU_bg'], metrics_result['IoU_mean'], metrics_result['Dice']))
def __init__(self, filename): self.experiment_name = filename self.generic_model_class = False self.metrics_test = Metrics() self.pp_data = PreprocessData() self.log = Log(filename, "txt") self.local_device_protos = None self.metrics_based_sample = False self.total_gpus = dn.TOTAL_GPUS self.use_custom_metrics = True self.use_valid_set_for_train = True self.valid_split_from_train_set = 0 self.imbalanced_classes = False self.iteraction = 1 self.get_available_gpus()
def run_skin(istrain, model_name, modelcheckpoint, model_pretrain=None, batch_size=32, nb_epoch=200, is_datagen=False): print('-' * 30) print('Loading data...') print('-' * 30) X_train, X_test, y_train, y_test = SkinData.load_from_npy( images_npy='cache/skin/datasets/images_224_224_tf.npy', masks_npy='cache/skin/datasets/masks_224_224_tf.npy') seg = Segmentation(model_name, modelcheckpoint) if istrain: seg.train(X_train, y_train, (X_test, y_test), weights_path=Segmentation.absolute_path(model_pretrain), batch_size=batch_size, nb_epoch=nb_epoch, is_datagen=is_datagen) predicts = seg.predict(X_test, batch_size=batch_size) pprint(Metrics.all(y_test, predicts)) return (X_test, y_test, predicts)
def test_rmsle(self): rmsle = Metrics().rmsle y_true = np.array([0, np.exp(2) - 1]) y_pred = np.array([0, np.exp(1) - 1]) result = rmsle(y_true, y_pred) self.assertEqual(result, np.sqrt(.5))
def _train_epoch(self, epoch): self.model.train() total_loss = 0 cnt = 0 metrics = Metrics() for idx, data in enumerate(self.train_loader, 0): images, masks = data images = images.to(self.device) masks = masks.to(self.device) outputs = self.model(images) # Metrics metrics.update_input(outputs, masks) seg_metrics = metrics.get_metrics(self.config["batch_metrics"]) self.optimizer.zero_grad() loss = self.loss(outputs, masks) loss.backward() self.optimizer.step() total_loss += loss cnt += 1 if idx % self.log_per_iter == 0: if self.tb_writer: self.tb_writer.add_scalars("Training", {"loss": total_loss / cnt, "lr": self.lr, **seg_metrics}, self.total_iters * epoch + idx) img_rgb = images[0] gt_rgb = label2rgb(tensor2numpy(masks[0])).type_as(img_rgb) pre_rgb = label2rgb(tensor2numpy(tensor2mask(outputs[0]))).type_as(img_rgb) self.tb_writer.add_image("Training/train_images", utils.make_grid([img_rgb, gt_rgb, pre_rgb]), self.total_iters * epoch + idx) # 不同步! # self.tb_writer.add_image("Training/GT", label2rgb(tensor2numpy(masks[0])), self.total_iters * epoch + idx) # self.tb_writer.add_image("Training/Pre", label2rgb(tensor2numpy(tensor2mask(outputs[0]))), self.total_iters * epoch + idx) # self.tb_writer.add_image("Training/image", images[0], self.total_iters * epoch + idx) show_str = f"Training, epoch: {epoch}, Iter: {idx}, lr: {self.lr}, loss: {total_loss / cnt}" for key in seg_metrics: this_str = f"{key}: {seg_metrics[key]}" show_str += (", " + this_str) print(show_str) average_loss = total_loss / cnt return average_loss
def test_epoch( model: nn.Module, loss_func: torch.optim, dataset, epoch: int, device: torch.device, loggr: bool = False, ) -> Tuple[float, float, float]: """Testing of the model for one epoch. Parameters ---------- model: nn.Module Model to be trained. loss_func: torch.nn.BCEWithLogitsLoss Loss function to be used. dataset: torch.utils.data.DataLoader Dataset to be used. epoch: int, optional The current epoch. device: torch.device Device to be used. loggr: bool, optional To log wandb metrics. (default: False) """ model.eval() pred_all = [] loss_all = [] gt_all = [] for batch_step in tqdm(range(len(dataset)), desc="valid"): batch_x, batch_y = dataset[batch_step] batch_x = batch_x.to(device) batch_x = batch_x.permute(0, 2, 1) batch_y = batch_y.to(device) pred = model(batch_x) pred_all.append(pred.cpu().detach().numpy()) loss = loss_func(pred, batch_y) loss_all.append(loss.cpu().detach().numpy()) gt_all.extend(batch_y.cpu().detach().numpy()) print("Test loss: ", np.mean(loss_all)) pred_all = np.concatenate(pred_all, axis=0) _, mean_acc = Metrics(np.array(gt_all), pred_all) roc_score = roc_auc_score(np.array(gt_all), pred_all, average="macro") if loggr: loggr.log({"test_mean_accuracy": mean_acc, "epoch": epoch}) loggr.log({"test_roc_score": roc_score, "epoch": epoch}) loggr.log({"test_loss": np.mean(loss_all), "epoch": epoch}) return np.mean(loss_all), mean_acc, roc_score
def __init__(self, options, model: nn.Module, read_dataset, append2metric=None, more_metric_to_train=None): """ 定义联邦学习的基本的服务器, 这里的模型是在所有的客户端之间共享使用 :param options: 参数配置 :param model: 模型 :param dataset: 数据集参数 :param optimizer: 优化器 :param criterion: 损失函数类型(交叉熵,Dice系数等等) :param worker: Worker 实例 :param append2metric: 自定义metric """ self.options = options self.model = self.setup_model(options=options, model=model) self.device = options['device'] # 记录总共的训练数据 self.clients = self.setup_clients(dataset=read_dataset, model=model) self.num_epochs = options['num_epochs'] self.num_rounds = options['num_rounds'] self.clients_per_round = options['clients_per_round'] self.save_every_round = options['save_every'] self.eval_on_train_every_round = options['eval_on_train_every'] self.eval_on_test_every_round = options['eval_on_test_every'] self.eval_on_validation_every_round = options[ 'eval_on_validation_every'] self.num_clients = len(self.clients) # 使用 client 的API self.latest_model = self.clients[0].get_parameters_list() self.name = '_'.join( ['', f'wn{options["clients_per_round"]}', f'tn{self.num_clients}']) self.metrics = Metrics( clients=self.clients, options=options, name=self.name, append2suffix=append2metric, result_prefix=options['result_prefix'], train_metric_extend_columns=more_metric_to_train) self.quiet = options['quiet']
def _val_epoch(self, epoch): self.model.eval() total_loss = 0 cnt = 0 metrics = Metrics() for idx, data in enumerate(self.val_loader, 0): images, masks = data images = images.to(self.device) masks = masks.to(self.device) outputs = self.model(images).detach() # Metrics metrics.update_input(outputs, masks) seg_metrics = metrics.get_metrics(self.config["batch_metrics"]) if idx % self.log_per_iter == 0: show_str = f"Validation, epoch: {epoch}, Iter: {idx}" for key in seg_metrics: this_str = f"{key}: {seg_metrics[key]}" show_str += (", " + this_str) print(show_str) global_metrics = metrics.get_metrics(self.config["global_metrics"]) if self.tb_writer: self.tb_writer.add_scalars("Validation", {**global_metrics}, epoch) return global_metrics
def __init__(self, env: UnityEnvironment, state_space: int, action_space: int, network_builder: Callable[[int, int], nn.Module], use_double_dqn: bool, gamma: float, batch_size: int, target_update: int, use_soft_updates: bool, policy_update: int, min_samples_in_memory: int, memory_capacity: int, eps_fn: Callable[[int], float], device: str, tb_logger: SummaryWriter): self._device = torch.device(device) self._dtype = torch.float self._network = network_builder(state_space, action_space).type( self._dtype).to(self._device).train() self._target_network = network_builder(state_space, action_space).type( self._dtype).to(self._device) self._target_network.load_state_dict(self._network.state_dict()) self._target_network.eval() self._optimizer = optim.Adam(self._network.parameters()) self._env = env self._state_space = state_space self._action_space = action_space self._brain_name = self._env.brain_names[0] self._memory = ReplayMemory(memory_capacity) self._metrics = Metrics() self._use_double_dqn = use_double_dqn self._gamma = gamma self._batch_size = batch_size self._target_update = target_update self._use_soft_update = use_soft_updates self._policy_update = policy_update self._min_samples_in_memory = min_samples_in_memory self._eps = eps_fn self._tb = tb_logger
def train(loader, num_classes, device, net, optimizer, criterion): num_samples = 0 running_loss = 0 metrics = Metrics(range(num_classes)) net.train() for images1,images2, masks in tqdm.tqdm(loader): images1 = images1.to(device) images2 = images2.to(device) masks = masks.to(device) assert images1.size()[2:] == images2.size()[2:] == masks.size()[2:], "resolutions for images and masks are in sync" num_samples += int(images1.size(0)) #print(num_samples) optimizer.zero_grad() outputs = net(images1,images2) #print(outputs.shape,masks.shape) #masks = masks.view(batch_size,masks.size()[2],masks.size()[3]) #print(masks.shape) #masks = masks.squeeze() assert outputs.size()[2:] == masks.size()[2:], "resolutions for predictions and masks are in sync" assert outputs.size()[1] == num_classes, "classes for predictions and dataset are in sync" loss = criterion(outputs, masks.float()) ##BCELoss #loss = criterion(outputs, masks.long()) loss.backward() optimizer.step() running_loss += loss.item() for mask, output in zip(masks, outputs): prediction = output.detach() metrics.add(mask, prediction) assert num_samples > 0, "dataset contains training images and labels" return { "loss": running_loss / num_samples, "precision": metrics.get_precision(), "recall": metrics.get_recall(), "f_score": metrics.get_f_score(), "oa":metrics.get_oa() }
def test_network(cfg, network, data_loader, checkpoint, result_set): _checkpoint = torch.load(checkpoint) _checkpoint = {k.replace('module.', ''): v for k, v in _checkpoint['rmnet'].items()} network.load_state_dict(_checkpoint) network.eval() checkpoint = os.path.basename(checkpoint) test_metrics = AverageMeter(Metrics.names()) device, = list(set(p.device for p in network.parameters())) for idx, (video_name, n_objects, frames, masks, optical_flows) in enumerate( tqdm(data_loader, leave=False, desc='%s on GPU %d' % (checkpoint, device.index), position=device.index)): with torch.no_grad(): try: est_probs = network(frames, masks, optical_flows, n_objects, cfg.TEST.MEMORIZE_EVERY, device) est_probs = est_probs.permute(0, 2, 1, 3, 4) masks = torch.argmax(masks, dim=2) est_masks = torch.argmax(est_probs, dim=1) except Exception as ex: logging.warning('Error occurred during testing Checkpoint[Name=%s]: %s' % (checkpoint, ex)) continue metrics = Metrics.get(est_masks[0], masks[0]) test_metrics.update(metrics, torch.max(n_objects[0]).item()) jf_mean = test_metrics.avg(2) if jf_mean != 0: logging.info('Checkpoint[Name=%s] has been tested successfully, JF-Mean = %.4f.' % (checkpoint, jf_mean)) else: logging.warning('Exception occurred during testing Checkpoint[Name=%s]' % checkpoint) result_set['JF-Mean'] = jf_mean
class MetricsWrapper(gym.Wrapper): def __init__(self, env=None): super(MetricsWrapper, self).__init__(env) self.metrics = Metrics() self.count = 0 self.total_reward = 0 def step(self, action): observation, reward, done, info = self.env.step(action) self.count = self.count + 1 xy = self.env.cur_pos # xzy ?? angle = self.env.cur_angle speed = action[0] steering = action[1] lane_pose = self.env.get_lane_pos2(xy, angle) center_dist = lane_pose.dist center_angle = lane_pose.angle_rad self.total_reward = self.total_reward + reward self.metrics.record(self.count, xy[0], xy[2], angle, speed, steering, center_dist, center_angle, reward, self.total_reward) return observation, reward, done, info
def run_training_predictors(data_input_path): ''' ''' config = sup.load_config(data_input_path) metrics = Metrics(config) #algorithm = config['Common'].get('model_type') pipeline_class_name = config.get('Training', 'pipeline_class', fallback=None) PipelineClass = locate('models.' + pipeline_class_name + '.ModelParam') model_param = PipelineClass() if model_param is None: raise Exception("Model pipeline could not be found: {}".format( 'models.' + pipeline_class_name + '.ModelParam')) X_train, y_train, X_val, y_val, y_classes, selected_features, \ feature_dict, paths, scorers, refit_scorer_name = exe.load_training_input_input(config) scorer = scorers[refit_scorer_name] results_directory = paths['results_directory'] save_fig_prefix = results_directory + '/model_images' #Baseline test baseline_results = exe.execute_baseline_classifier(X_train, y_train, X_val, y_val, y_classes, scorer) print("Baseline results=", baseline_results) #Set classifier and estimate performance model_clf = model_param.create_pipeline()['model'] log.info("{} selected.".format(model_clf)) #algorithm="" #if algorithm=='xgboost': # model_clf = XGBClassifier(objective="binary:logistic", random_state=42) # log.info("XBoost Classifier selected.") #else: # model_clf = SVC() # log.info("SVM (default) classifier selected.") run_training_estimation(X_train, y_train, X_val, y_val, scorer, model_clf, save_fig_prefix)
def on_epoch_end(self, epoch: int, logs: dict = {}) -> None: """Saves the model and logs the metrics at the end of each epoch. Parameters ---------- epoch: int Epoch number. logs: dict, optional Dictionary containing the metrics. (default: {}) """ test_len = len(self.test_data) score = [] gt = [] for i in range(test_len): X, y = self.test_data[i][0], self.test_data[i][1] temp_score = self.model.predict(X) score.append(temp_score) gt.append(y) score = np.concatenate(score, axis=0) gt = np.concatenate(gt, axis=0) roc_auc = roc_auc_score(gt, score, average="macro") _, accuracy = Metrics(gt, score) temp_path = f"{self.name}_weights.h5" path = os.path.join(self.savepath, temp_path) if epoch > 5 and self.best_score < roc_auc: self.best_score = roc_auc self.model.save_weights(path) if self.loggr: self.loggr.log({"train_loss": logs["loss"], "epoch": epoch}) self.loggr.log( {"train_keras_auroc": logs.get(self.monitor), "epoch": epoch} ) self.loggr.log({"test_loss": logs["val_loss"], "epoch": epoch}) self.loggr.log({"test_keras_auroc": logs["val_auc"], "epoch": epoch}) self.loggr.log({"test_roc_score": roc_auc, "epoch": epoch}) self.loggr.log({"test_accuracy_score": accuracy, "epoch": epoch}) logs["val_roc_auc"] = roc_auc logs["val_accuracy_score"] = accuracy
def test(args): # Get hardware device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Load dataset based on usage specified in "test_split" argument, either PrivateTest or PublicTest test_dataset = FER2013Dataset(args.data_path, args.test_split) # Load dataloader with a large size and shuffle false since we dont care about the order while testing test_loader = DataLoader(test_dataset, batch_size=2048, shuffle=False, pin_memory=True) # Load model definition from the model config, do not initialize weights for test since they will be loaded model = Model(args.model_config, initialize_weights=False) # Load model state and weights from the checkpoint. convertModel converts any DataParallel model to current device. model = convertModel(args.load_model, model).to(device) # Model in evaluation mode model.eval() # Intialize metric logger object metrics = Metrics() # Set no grad to disable gradient saving. with torch.no_grad(): # Iterate over each batch in the test loader for idx, batch in enumerate(test_loader): # Move the batch to the device, needed explicitly if GPU is present image, target = batch["image"].to(device), batch["emotion"].to( device) # Forward pass out = model(image) # Metrics and sample predictions metrics.update_test({"predicted": out, "ground_truth": target}) # Get confusion matrix plot metrics.confusion_matrix_plot(test_dataset.get_class_mapping(), args.test_split) # Save other statistics to the csv report test_report = metrics.get_report(mode="test") test_report.to_csv("results/{}_testreport.csv".format( args.load_model.format("/")[-1].split(".")[0]))
def validate(loader, num_classes, device, net, scheduler, criterion): num_samples = 0 running_loss = 0 metrics = Metrics(range(num_classes)) net.eval() for images, masks in tqdm.tqdm(loader): images = torch.squeeze(images.to(device, dtype=torch.float)) masks = torch.squeeze(masks.to(device).long()) assert images.size()[2:] == masks.size( )[1:], "resolutions for images and masks are in sync" num_samples += int(images.size(0)) outputs0, outputs1, outputs2, outputs3, outputs0_2, outputs1_2, outputs2_2, outputs3_2 = net( images) loss0 = criterion(outputs0, masks) loss1 = criterion(outputs1, masks) loss2 = criterion(outputs2, masks) loss3 = criterion(outputs3, masks) loss0_2 = criterion(outputs0_2, masks) loss1_2 = criterion(outputs1_2, masks) loss2_2 = criterion(outputs2_2, masks) loss3_2 = criterion(outputs3_2, masks) loss = loss0 + loss1 + loss2 + loss3 + loss0_2 + loss1_2 + loss2_2 + loss3_2 running_loss += loss.item() outputs = (outputs0_2 + outputs1_2 + outputs2_2 + outputs3_2) / 4 for mask, output in zip(masks, outputs): metrics.add(mask, output) assert num_samples > 0, "dataset contains validation images and labels" scheduler.step(metrics.get_miou()) # update learning rate return { "loss": running_loss / num_samples, "miou": metrics.get_miou(), "fg_iou": metrics.get_fg_iou(), "mcc": metrics.get_mcc(), }
def test( model: nn.Module, path: str = "data/ptb", batch_size: int = 32, name: str = "imle_net", ) -> None: """Data preprocessing and testing of the model. Parameters ---------- model: nn.Module Model to be trained. path: str, optional Path to the directory containing the data. (default: 'data/ptb') batch_size: int, optional Batch size. (default: 32) name: str, optional Name of the model. (default: 'imle_net') """ _, _, X_test_scale, y_test, _, _ = preprocess(path=path) test_gen = DataGen(X_test_scale, y_test, batch_size=batch_size) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") pred = epoch_run(model, test_gen, device, name) roc_score = roc_auc_score(y_test, pred, average="macro") acc, mean_acc = Metrics(y_test, pred) class_auc = AUC(y_test, pred) summary = metric_summary(y_test, pred) print(f"class wise accuracy: {acc}") print(f"accuracy: {mean_acc}") print(f"roc_score : {roc_score}") print(f"class wise AUC : {class_auc}") print(f"class wise precision, recall, f1 score : {summary}") logs = dict() logs["roc_score"] = roc_score logs["mean_acc"] = mean_acc logs["accuracy"] = acc logs["class_auc"] = class_auc logs["class_precision_recall_f1"] = summary logs_path = os.path.join(os.getcwd(), "logs", f"{name}_logs.json") json.dump(logs, open(logs_path, "w"))
def train(loader, num_classes, device, net, optimizer, criterion): num_samples = 0 running_loss = 0 metrics = Metrics(range(num_classes)) net.train() for images, masks in tqdm.tqdm(loader): images = torch.squeeze(images.to(device, dtype=torch.float)) masks = torch.squeeze(masks.to(device)) # print("images'size:{},masks'size:{}".format(images.size(),masks.size())) assert images.size()[2:] == masks.size( )[1:], "resolutions for images and masks are in sync" num_samples += int(images.size(0)) optimizer.zero_grad() outputs0, outputs1, outputs2, outputs3, outputs0_2, outputs1_2, outputs2_2, outputs3_2 = net( images) loss0 = criterion(outputs0, masks) loss1 = criterion(outputs1, masks) loss2 = criterion(outputs2, masks) loss3 = criterion(outputs3, masks) loss0_2 = criterion(outputs0_2, masks) loss1_2 = criterion(outputs1_2, masks) loss2_2 = criterion(outputs2_2, masks) loss3_2 = criterion(outputs3_2, masks) loss = loss0 + loss1 + loss2 + loss3 + loss0_2 + loss1_2 + loss2_2 + loss3_2 loss.backward() batch_loss = loss.item() optimizer.step() running_loss += batch_loss outputs = (outputs0_2 + outputs1_2 + outputs2_2 + outputs3_2) / 4 for mask, output in zip(masks, outputs): prediction = output.detach() metrics.add(mask, prediction) assert num_samples > 0, "dataset contains training images and labels" return { "loss": running_loss / num_samples, "miou": metrics.get_miou(), "fg_iou": metrics.get_fg_iou(), "mcc": metrics.get_mcc(), }
def load_training_input_input(conf): ''' Load input model and data from a prepared pickle file :args: input_path: Input path of pickle file with prepared data :return: X_train: Training data y_train: Training labels as numbers X_test: Test data y_test: Test labels as numbers y_classes: Class names assigned to numbers scorer: Scorer for the evaluation, default f1 ''' # Load input X_train_path = os.path.join(conf['Training'].get('features_train_in')) y_train_path = os.path.join(conf['Training'].get('outcomes_train_in')) X_val_path = os.path.join(conf['Training'].get('features_val_in')) y_val_path = os.path.join(conf['Training'].get('outcomes_val_in')) labels_path = os.path.join(conf['Training'].get('labels_in')) X_train, _, y_train = load_data(X_train_path, y_train_path) X_val, _, y_val = load_data(X_val_path, y_val_path) labels = load_labels(labels_path) y_classes = labels # train['label_map'] print("Load feature columns") feature_columns_path = os.path.join( conf['Training'].get('selected_feature_columns_in')) selected_features, feature_dict, df_feature_columns = load_feature_columns( feature_columns_path, X_train) print("Load metrics") metrics = Metrics(conf) scorers = metrics.scorers refit_scorer_name = metrics.refit_scorer_name print("Load paths") paths = Paths(conf).paths return X_train, y_train, X_val, y_val, y_classes, selected_features, feature_dict, paths, scorers, refit_scorer_name