def main(): """ Starting point of the application """ hvd.init() params = parse_args(PARSER.parse_args()) set_flags(params) model_dir = prepare_model_dir(params) params.model_dir = model_dir logger = get_logger(params) model = Unet() dataset = Dataset(data_dir=params.data_dir, batch_size=params.batch_size, fold=params.fold, augment=params.augment, gpu_id=hvd.rank(), num_gpus=hvd.size(), seed=params.seed) if 'train' in params.exec_mode: train(params, model, dataset, logger) if 'evaluate' in params.exec_mode: if hvd.rank() == 0: evaluate(params, model, dataset, logger) if 'predict' in params.exec_mode: if hvd.rank() == 0: predict(params, model, dataset, logger)
def __init__(self, kwargs): super(DepthRegressorTrainer, self).__init__() self.hparams = kwargs if self.hparams.resize_input: self.unet = Unet(channels_in=3, channels_out=1) else: self.unet = UNetMini(channels_in=3, channels_out=1) self.dataset = lambda split: ScenesDataset( split, self.hparams.datasetdir, self.hparams.splitsdir, kwargs)
def train_model(self): ''' Train a unet model ''' from model.unet import Unet # prepare unet modela _unet = Unet(self.img_height, self.img_width, self.img_channels) inputs, outputs = _unet.model() model = Model(inputs=[inputs], outputs=[outputs]) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[my_iou_metric]) print(model.summary()) ### FIT # Initialize our callbacks model_path = self.model_output_path # TODO change to ->model_output_path checkpoint = ModelCheckpoint(model_path, monitor="val_loss", mode="min", save_best_only=True, verbose=1) print('Crossedd checkpoint') earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=1) # restore_best_weights = True) # Fit our model results = model.fit(X_train, Y_train, validation_split=0.1, batch_size=16, epochs=10, callbacks=[earlystop, checkpoint]) return model_path
def main(): """ Starting point of the application """ params = parse_args(description="UNet-medical") if params.use_horovod: hvd_init() set_flags(params) model_dir = prepare_model_dir(params) params.model_dir = model_dir logger = get_logger(params) tb_logger = None if params.tensorboard_logging: log_dir = params.log_dir if horovod_enabled() and params.log_all_workers: log_dir = os.path.join(log_dir, f'worker_{hvd_rank()}') tb_logger = namedtuple('TBSummaryWriters', 'train_writer eval_writer')( tf.summary.create_file_writer(log_dir), tf.summary.create_file_writer(os.path.join(log_dir, 'eval'))) model = Unet() dataset = Dataset(data_dir=params.data_dir, batch_size=params.batch_size, fold=params.fold, augment=params.augment, hpu_id=hvd_rank() if horovod_enabled() else 0, num_hpus=hvd_size() if horovod_enabled() else 1, seed=params.seed) if 'train' in params.exec_mode: with dump_callback(params.dump_config): train(params, model, dataset, logger, tb_logger) if 'evaluate' in params.exec_mode: evaluate(params, model, dataset, logger, tb_logger) if 'predict' in params.exec_mode: predict(params, model, dataset, logger)
def __init__(self, kwargs): super(SceneNetTrainer, self).__init__() self.hparams = kwargs self.ifnet = IFNet() self.kernel_size = self.hparams.kernel_size self.dims = torch.tensor([139, 104, 112], device=self.device) self.dims = (self.dims / self.hparams.scale_factor).round().long() self.project = project(self.dims, self.kernel_size, torch.tensor(self.hparams.sigma)) if self.hparams.resize_input: self.unet = Unet(channels_in=3, channels_out=1) else: self.unet = UNetMini(channels_in=3, channels_out=1) if self.hparams.skip_unet: self.unet = None self.dataset = lambda split: scene_net_data( split, self.hparams.datasetdir, self.hparams.num_points, self. hparams.splitsdir, self.hparams)
import datetime from util.load_cfg import train_cfg, dataset_cfg, sample_cfg from util.dice import * from model.unet import Unet from data.train import dataloader checkpoint_dir = train_cfg["checkpoint_dir"] log_dir = train_cfg["log_dir"] if not os.path.exists(checkpoint_dir): os.mkdir(checkpoint_dir) if not os.path.exists(log_dir): os.mkdir(log_dir) model = Unet(sample_cfg["patch_size"]) # Learning rate and optimizer (学习率调整和优化器) cosine_decay = tf.keras.experimental.CosineDecayRestarts( initial_learning_rate=train_cfg["init_lr"], first_decay_steps=12000, t_mul=1000, m_mul=0.5, alpha=1e-5) optimizer = tf.keras.optimizers.Adam(learning_rate=cosine_decay) # loss function (损失函数) #loss=tf.keras.losses.BinaryCrossentropy(from_logits=False) # metric record (性能指标记录器) train_loss = tf.keras.metrics.Mean(name='train_loss')
if __name__ == '__main__': images, labels = create_inputs_seg_hand(is_train=True) # tf.reshape(labels) session_config = tf.ConfigProto( device_count={'GPU': 0}, gpu_options={ 'allow_growth': 1, # 'per_process_gpu_memory_fraction': 0.1, 'visible_device_list': '0' }, allow_soft_placement=True) ##这个设置必须有,否则无论如何都会报cudnn不匹配的错误,BUG十分隐蔽,真是智障 with tf.Session(config=session_config) as sess: # 1、先定义model才能执行第二步的初始化 model = Unet(sess, cfg, is_train=True, size=(128, 128), l2_reg=0.0001) # 2、初始化和启动线程 tf.global_variables_initializer().run() tf.local_variables_initializer().run() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) if model_restore_name: model.restore(model_restore_name) # 3、训练模型 # num_epochs=10000 for i in range(start_epoch, num_epochs): since = time.time() #1、读图 pics, pics_masks = sess.run([images, labels]) # 取出一个batchsize的图片
os.makedirs(predict_pics_save, exist_ok=True) if __name__ == '__main__': images, labels = create_inputs_seg_hand(is_train=is_train) session_config = tf.ConfigProto( device_count={'GPU': 0}, gpu_options={ 'allow_growth': 1, # 'per_process_gpu_memory_fraction': 0.1, 'visible_device_list': '0' }, allow_soft_placement=True) ##这个设置必须有,否则无论如何都会报cudnn不匹配的错误,BUG十分隐蔽,真是智障 with tf.Session(config=session_config) as sess: # 1、先定义model才能执行第二步的初始化 model = Unet(sess, cfg, is_train=is_train) # 2、初始化和启动线程 tf.global_variables_initializer().run() tf.local_variables_initializer().run() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) model.restore(model_restore_name) #3、测试图片 index = 0 for i in range(test_data_number // batch_size): pics, pics_masks = sess.run([images, labels]) # 取出一个batchsize的图片 pics = pics / 255 # 3、计算耗时 since = time.time()
if __name__ == "__main__": img_size = 256 batch_size = 32 train_path = './data/stage1_train/' test_path = './data/stage1_test/' X_train, Y_train, X_test, sizes_test = make_df(train_path, test_path, img_size) xtr, xval, ytr, yval = train_test_split(X_train, Y_train, test_size=0.1, random_state=7) train_generator, val_generator = generator(xtr, xval, ytr, yval, batch_size) model = Unet(img_size) model.compile( optimizer='adam', loss=bce_dice_loss, metrics=[bce_dice_loss, recall_score, precision_score, rocauc_score]) ckpt = ModelCheckpoint('.model.hdf5', save_best_only=True, monitor='val_rocauc_score', mode='max') model.fit_generator(train_generator, steps_per_epoch=len(xtr) / 6, epochs=50, validation_data=val_generator, validation_steps=len(xval) / batch_size, callbacks=[ckpt])
def main(): """ Starting point of the application """ flags = PARSER.parse_args() params = _cmd_params(flags) backends = [StdOutBackend(Verbosity.VERBOSE)] if params.log_dir is not None: backends.append(JSONStreamBackend(Verbosity.VERBOSE, params.log_dir)) logger = Logger(backends) # Optimization flags os.environ['CUDA_CACHE_DISABLE'] = '0' os.environ['HOROVOD_GPU_ALLREDUCE'] = 'NCCL' os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private' os.environ['TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT'] = 'data' os.environ['TF_ADJUST_HUE_FUSED'] = 'data' os.environ['TF_ADJUST_SATURATION_FUSED'] = 'data' os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = 'data' os.environ['TF_SYNC_ON_FINISH'] = '0' os.environ['TF_AUTOTUNE_THRESHOLD'] = '2' hvd.init() if params.use_xla: tf.config.optimizer.set_jit(True) gpus = tf.config.experimental.list_physical_devices('GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) if gpus: tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()], 'GPU') if params.use_amp: tf.keras.mixed_precision.experimental.set_policy('mixed_float16') else: os.environ['TF_ENABLE_AUTO_MIXED_PRECISION'] = '0' # Build the model model = Unet() dataset = Dataset(data_dir=params.data_dir, batch_size=params.batch_size, fold=params.crossvalidation_idx, augment=params.augment, gpu_id=hvd.rank(), num_gpus=hvd.size(), seed=params.seed) if 'train' in params.exec_mode: train(params, model, dataset, logger) if 'evaluate' in params.exec_mode: if hvd.rank() == 0: model = restore_checkpoint(model, params.model_dir) evaluate(params, model, dataset, logger) if 'predict' in params.exec_mode: if hvd.rank() == 0: model = restore_checkpoint(model, params.model_dir) predict(params, model, dataset, logger)
from util import trainer from data.train_data_provider import ImageDataProvider from model.unet import Unet output_path = "/data/Cell/unet/model3/" data_provider = ImageDataProvider("/data/Cell/unet/*.jpg") net = Unet(layers=3, features_root=32, channels=3, n_class=2) trainer = trainer.Trainer(net, optimizer="adam") path = trainer.train(data_provider, output_path, training_iters=32, epochs=100)
def main(args): os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu if 'deeplab' in args.model_name: if 'resnet101' in args.model_name: net = Deeplabv3plus(nInputChannels=3, n_classes=args.num_classes, os=args.output_stride, backbone_type='resnet101') elif 'resnet50' in args.model_name: net = Deeplabv3plus(nInputChannels=3, n_classes=args.num_classes, os=args.output_stride, backbone_type='resnet50') elif 'resnet34' in args.model_name: net = Deeplabv3plus(nInputChannels=3, n_classes=args.num_classes, os=args.output_stride, backbone_type='resnet34') elif 'unet' in args.model_name: net = Unet(in_ch=3, out_ch=1) elif 'trfe' in args.model_name: if args.model_name == 'trfe': net = TRFENet(in_ch=3, out_ch=1) elif args.model_name == 'trfe1': net = TRFENet1(in_ch=3, out_ch=1) elif args.model_name == 'trfe2': net = TRFENet2(in_ch=3, out_ch=1) elif 'mtnet' in args.model_name: net = MTNet(in_ch=3, out_ch=1) elif 'segnet' in args.model_name: net = SegNet(input_channels=3, output_channels=1) elif 'fcn' in args.model_name: net = FCN8s(1) else: raise NotImplementedError net.load_state_dict(torch.load(args.load_path)) net.cuda() composed_transforms_ts = transforms.Compose([ trforms.FixedResize(size=(args.input_size, args.input_size)), trforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), trforms.ToTensor()]) if args.test_dataset == 'TN3K': test_data = tn3k.TN3K(mode='test', transform=composed_transforms_ts, return_size=True) save_dir = args.save_dir + args.test_fold + '-' + args.test_dataset + os.sep + args.model_name + os.sep testloader = DataLoader(test_data, batch_size=1, shuffle=False, num_workers=0) num_iter_ts = len(testloader) if not os.path.exists(save_dir): os.makedirs(save_dir) net.cuda() net.eval() start_time = time.time() with torch.no_grad(): total_iou = 0 for sample_batched in tqdm(testloader): inputs, labels, label_name, size = sample_batched['image'], sample_batched['label'], sample_batched[ 'label_name'], sample_batched['size'] inputs = Variable(inputs, requires_grad=False) labels = Variable(labels) labels = labels.cuda() inputs = inputs.cuda() if 'trfe' in args.model_name or 'mtnet' in args.model_name: outputs, _ = net.forward(inputs) else: outputs = net.forward(inputs) prob_pred = torch.sigmoid(outputs) iou = utils.get_iou(prob_pred, labels) total_iou += iou shape = (size[0, 0], size[0, 1]) prob_pred = F.interpolate(prob_pred, size=shape, mode='bilinear', align_corners=True).cpu().data save_data = prob_pred[0] save_png = save_data[0].numpy() save_png = np.round(save_png) save_png = save_png * 255 save_png = save_png.astype(np.uint8) save_path = save_dir + label_name[0] if not os.path.exists(save_path[:save_path.rfind('/')]): os.makedirs(save_path[:save_path.rfind('/')]) cv2.imwrite(save_dir + label_name[0], save_png) print(args.model_name + ' iou:' + str(total_iou / len(testloader))) duration = time.time() - start_time print("-- %s contain %d images, cost time: %.4f s, speed: %.4f s." % ( args.test_dataset, num_iter_ts, duration, duration / num_iter_ts)) print("------------------------------------------------------------------")
def main(args): os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu save_dir_root = os.path.join(os.path.dirname(os.path.abspath(__file__))) if args.resume_epoch != 0: runs = sorted(glob.glob(os.path.join(save_dir_root, 'run', 'run_*'))) run_id = int(runs[-1].split('_')[-1]) if runs else 0 else: runs = sorted(glob.glob(os.path.join(save_dir_root, 'run', 'run_*'))) run_id = int(runs[-1].split('_')[-1]) + 1 if runs else 0 if args.run_id >= 0: run_id = args.run_id save_dir = os.path.join(save_dir_root, 'run', 'run_' + str(run_id)) log_dir = os.path.join( save_dir, datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname()) writer = SummaryWriter(log_dir=log_dir) batch_size = args.batch_size if 'deeplab' in args.model_name: if 'resnet101' in args.model_name: net = Deeplabv3plus(nInputChannels=3, n_classes=args.num_classes, os=args.output_stride, backbone_type='resnet101') elif 'resnet50' in args.model_name: net = Deeplabv3plus(nInputChannels=3, n_classes=args.num_classes, os=args.output_stride, backbone_type='resnet50') elif 'resnet34' in args.model_name: net = Deeplabv3plus(nInputChannels=3, n_classes=args.num_classes, os=args.output_stride, backbone_type='resnet34') else: raise NotImplementedError elif 'unet' in args.model_name: net = Unet(in_ch=3, out_ch=1) elif 'trfe' in args.model_name: if args.model_name == 'trfe1': net = TRFENet1(in_ch=3, out_ch=1) elif args.model_name == 'trfe2': net = TRFENet2(in_ch=3, out_ch=1) elif args.model_name == 'trfe': net = TRFENet(in_ch=3, out_ch=1) batch_size = 4 elif 'mtnet' in args.model_name: net = MTNet(in_ch=3, out_ch=1) batch_size = 4 elif 'segnet' in args.model_name: net = SegNet(input_channels=3, output_channels=1) elif 'fcn' in args.model_name: net = FCN8s(1) else: raise NotImplementedError if args.resume_epoch == 0: print('Training ' + args.model_name + ' from scratch...') else: load_path = os.path.join( save_dir, args.model_name + '_epoch-' + str(args.resume_epoch) + '.pth') print('Initializing weights from: {}...'.format(load_path)) net.load_state_dict(torch.load(load_path)) if args.pretrain == 'THYROID': net.load_state_dict( torch.load('./pre_train/thyroid-pretrain.pth', map_location=lambda storage, loc: storage)) print('loading pretrain model......') torch.cuda.set_device(device=0) net.cuda() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum) if args.criterion == 'Dice': criterion = soft_dice else: raise NotImplementedError composed_transforms_tr = transforms.Compose([ trforms.FixedResize(size=(args.input_size, args.input_size)), trforms.RandomHorizontalFlip(), trforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), trforms.ToTensor() ]) composed_transforms_ts = transforms.Compose([ trforms.FixedResize(size=(args.input_size, args.input_size)), trforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), trforms.ToTensor() ]) if args.dataset == 'TN3K': train_data = tn3k.TN3K(mode='train', transform=composed_transforms_tr, fold=args.fold) val_data = tn3k.TN3K(mode='val', transform=composed_transforms_ts, fold=args.fold) elif args.dataset == 'TG3K': train_data = tg3k.TG3K(mode='train', transform=composed_transforms_tr) val_data = tg3k.TG3K(mode='val', transform=composed_transforms_ts) elif args.dataset == 'TATN': train_data = tatn.TATN(mode='train', transform=composed_transforms_tr, fold=args.fold) val_data = tatn.TATN(mode='val', transform=composed_transforms_ts, fold=args.fold) trainloader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=0) testloader = DataLoader(val_data, batch_size=1, shuffle=False, num_workers=0) num_iter_tr = len(trainloader) num_iter_ts = len(testloader) nitrs = args.resume_epoch * num_iter_tr nsamples = args.resume_epoch * len(train_data) print('nitrs: %d num_iter_tr: %d' % (nitrs, num_iter_tr)) print('nsamples: %d tot_num_samples: %d' % (nsamples, len(train_data))) aveGrad = 0 global_step = 0 recent_losses = [] start_t = time.time() best_f, cur_f = 0.0, 0.0 for epoch in range(args.resume_epoch, args.nepochs): net.train() epoch_losses = [] for ii, sample_batched in enumerate(trainloader): if 'trfe' in args.model_name or args.model_name == 'mtnet': nodules, glands = sample_batched inputs_n, labels_n = nodules['image'].cuda( ), nodules['label'].cuda() inputs_g, labels_g = glands['image'].cuda( ), glands['label'].cuda() inputs = torch.cat( [inputs_n[0].unsqueeze(0), inputs_g[0].unsqueeze(0)], dim=0) for i in range(1, inputs_n.size()[0]): inputs = torch.cat([inputs, inputs_n[i].unsqueeze(0)], dim=0) inputs = torch.cat([inputs, inputs_g[i].unsqueeze(0)], dim=0) global_step += inputs.data.shape[0] nodule, thyroid = net.forward(inputs) loss = 0 for i in range(inputs.size()[0]): if i % 2 == 0: loss += criterion(nodule[i], labels_n[int(i / 2)], size_average=False, batch_average=True) else: loss += 0.5 * criterion(thyroid[i], labels_g[int((i - 1) / 2)], size_average=False, batch_average=True) else: inputs, labels = sample_batched['image'].cuda( ), sample_batched['label'].cuda() global_step += inputs.data.shape[0] outputs = net.forward(inputs) loss = criterion(outputs, labels, size_average=False, batch_average=True) trainloss = loss.item() epoch_losses.append(trainloss) if len(recent_losses) < args.log_every: recent_losses.append(trainloss) else: recent_losses[nitrs % len(recent_losses)] = trainloss # Backward the averaged gradient loss.backward() aveGrad += 1 nitrs += 1 nsamples += args.batch_size # Update the weights once in p['nAveGrad'] forward passes if aveGrad % args.naver_grad == 0: optimizer.step() optimizer.zero_grad() aveGrad = 0 if nitrs % args.log_every == 0: meanloss = sum(recent_losses) / len(recent_losses) print('epoch: %d ii: %d trainloss: %.2f timecost:%.2f secs' % (epoch, ii, meanloss, time.time() - start_t)) writer.add_scalar('data/trainloss', meanloss, nsamples) meanloss = sum(epoch_losses) / len(epoch_losses) print('epoch: %d meanloss: %.2f' % (epoch, meanloss)) writer.add_scalar('data/epochloss', meanloss, nsamples) if args.use_test == 1: prec_lists = [] recall_lists = [] sum_testloss = 0.0 total_mae = 0.0 cnt = 0 count = 0 iou = 0 if args.use_eval == 1: net.eval() for ii, sample_batched in enumerate(testloader): inputs, labels = sample_batched['image'].cuda( ), sample_batched['label'].cuda() with torch.no_grad(): if 'trfe' in args.model_name or args.model_name == 'mtnet': outputs, _ = net.forward(inputs) else: outputs = net.forward(inputs) loss = criterion(outputs, labels, size_average=False, batch_average=True) sum_testloss += loss.item() predictions = torch.sigmoid(outputs) iou += utils.get_iou(predictions, labels) count += 1 total_mae += utils.get_mae(predictions, labels) * predictions.size(0) prec_list, recall_list = utils.get_prec_recall( predictions, labels) prec_lists.extend(prec_list) recall_lists.extend(recall_list) cnt += predictions.size(0) if ii % num_iter_ts == num_iter_ts - 1: mmae = total_mae / cnt mean_testloss = sum_testloss / num_iter_ts mean_prec = sum(prec_lists) / len(prec_lists) mean_recall = sum(recall_lists) / len(recall_lists) fbeta = 1.3 * mean_prec * mean_recall / (0.3 * mean_prec + mean_recall) iou = iou / count print('Validation:') print( 'epoch: %d, numImages: %d testloss: %.2f mmae: %.4f fbeta: %.4f iou: %.4f' % (epoch, cnt, mean_testloss, mmae, fbeta, iou)) writer.add_scalar('data/validloss', mean_testloss, nsamples) writer.add_scalar('data/validmae', mmae, nsamples) writer.add_scalar('data/validfbeta', fbeta, nsamples) writer.add_scalar('data/validiou', iou, epoch) cur_f = iou if cur_f > best_f: save_path = os.path.join( save_dir, args.model_name + '_best' + '.pth') torch.save(net.state_dict(), save_path) print("Save model at {}\n".format(save_path)) best_f = cur_f if epoch % args.save_every == args.save_every - 1: save_path = os.path.join( save_dir, args.model_name + '_epoch-' + str(epoch) + '.pth') torch.save(net.state_dict(), save_path) print("Save model at {}\n".format(save_path))