def build_graph(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='image') input_label = tf.placeholder(tf.int32, shape=[None, 1], name='labels') input_bbox_targets = tf.placeholder(tf.float32, shape=[None, 4], name='gt_boxes') input_bbox_inside_weights = tf.placeholder(tf.float32, shape=[None, 4], name="bbox_inside_weights") input_bbox_outside_weights = tf.placeholder(tf.float32, shape=[None, 4], name='bbox_outside_weights') learing_rate = tf.placeholder(tf.float32) cnn_layers = ResNet() rnn_layers = Lstm() x = input_image x = cnn_layers.build(x, True) x = rnn_layers.build(x, 512,128, 512) ######################## # rpn cls score ######################## y = fc_layer(x, 512, 10 * 2, "fc_rpn_cls") dims = tf.shape(y) cls_prob = tf.reshape(tf.nn.softmax(tf.reshape(y, [-1, 2])), [dims[0], dims[1], -1, 2]) ######################### # rpn bbox pred ######################### box_pred = fc_layer(x, 512, 10 * 4, "fc_rpn_pred") loss_layer = Loss() output_loss ,cls_loss,box_loss = loss_layer.build(y,box_pred,input_label,input_bbox_targets,input_bbox_inside_weights,input_bbox_outside_weights) train_step = tf.train.AdamOptimizer(learing_rate).minimize(output_loss) return [train_step,output_loss,learing_rate,input_image, input_label,input_bbox_targets,input_bbox_inside_weights,input_bbox_outside_weights,cls_prob,box_pred,cls_loss,box_loss]
def train_step(model, optimizer, x_train, y_train): with tf.GradientTape() as tape: y_pred = model(x_train, training=True) loss = Loss()(y_train, y_pred) grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) return loss
def main(): args = parse_args() # cudnn related setting cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED model = eval('models.' + config.MODEL.NAME + '.get_nnb')(config) writer_dict = { 'writer': SummaryWriter(log_dir='./output/facexray'), 'train_global_steps': 0, 'valid_global_steps': 0, } gpus = list(config.GPUS) model = torch.nn.DataParallel(model) # define loss function (criterion) and optimizer criterion = Loss() optimizer = get_optimizer(config, model) last_epoch = config.TRAIN.BEGIN_EPOCH if isinstance(config.TRAIN.LR_STEP, list): lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR, last_epoch - 1) else: lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR, last_epoch - 1) # Data loading code # list_name没有单独标注在.yaml文件 # transform还没能适用于其他规格,应做成[256, 256, 3] train_dataset = eval('dataset.' + config.DATASET.DATASET + '.' + config.DATASET.DATASET)( config.DATASET.ROOT, config.DATASET.TRAIN_SET, None, transforms.Compose([transforms.ToTensor()])) valid_dataset = eval('dataset.' + config.DATASET.DATASET + '.' + config.DATASET.DATASET)(config.DATASET.ROOT, config.DATASET.TEST_SET, None, transforms.Compose( [transforms.ToTensor()])) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN.BATCH_SIZE_PER_GPU, shuffle=config.TRAIN.SHUFFLE, num_workers=config.WORKERS, pin_memory=config.PIN_MEMORY) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.TEST.BATCH_SIZE_PER_GPU, shuffle=False, num_workers=config.WORKERS, pin_memory=config.PIN_MEMORY) for epoch in range(last_epoch, config.TRAIN.END_EPOCH): lr_scheduler.step() # 前50000次迭代锁定原hrnet层参数训练,后面的迭代训练所有参数 if epoch == 150000: for k, v in model.named_parameters(): v.requires_grad = True # train for one epoch train(config, train_loader, model, criterion, optimizer, epoch, writer_dict) # evaluate on validation set validate(config, valid_loader, model, criterion, writer_dict) torch.save(model.module.state_dict(), './output/BI_dataset/faceXray.pth') writer_dict['writer'].close()
def main(): args = parse_args() # cudnn related setting cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED nnb = models.nnb.get_nnb(config) # 不锁定参数 TODO: optimzer 中途添加参数 # nnb = models.ae.get_ae() # nnb = models.fcn.get_fcn(config) # 训练时令nnc的softmax不起作用 nnc = models.nnc.get_nnc(config) writer_dict = { 'writer': SummaryWriter(log_dir='./output/facexray/tensorboard/tensorboard' + '_' + datetime.now().strftime('%Y%m%d_%H%M%S')), 'train_global_steps': 0, 'valid_global_steps': 0, 'test_global_steps': 0, } # log init save_dir = os.path.join('./output/facexray/log/log' + '_' + datetime.now().strftime('%Y%m%d_%H%M%S')) if os.path.exists(save_dir): raise NameError('model dir exists!') os.makedirs(save_dir) logging = init_log(save_dir) _print = logging.info gpus = list(config.GPUS) nnb = torch.nn.DataParallel(nnb, device_ids=[0]).cuda() nnc = torch.nn.DataParallel(nnc, device_ids=[0]).cuda() # define loss function (criterion) and optimizer criterion = Loss() # 一些参数 # 初始化optimzer,训练除nnb的原hrnet参数外的参数 optimizer = get_optimizer(config, [nnb, nnc]) # TODO: 暂时直接全部初始化 NNB_GRAD = False nnb.module.pretrained_grad(NNB_GRAD) last_iter = config.TRAIN.BEGIN_ITER best_perf = 0.0 if isinstance(config.TRAIN.LR_STEP, list): lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR, last_iter - 1) else: lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR, last_iter - 1) # Data loading code # transform还没能适用于其他规格,应做成[256, 256, 3] # train_dataset = eval('dataset.' + config.DATASET.TRAIN_SET + '.' + config.DATASET.TRAIN_SET)( # root=config.DATASET.TRAIN_ROOT, list_name=config.DATASET.TRAIN_LIST, mode='train', Transform='simple') # valid_dataset = eval('dataset.' + config.DATASET.EVAL_SET + '.' + config.DATASET.EVAL_SET)( # root=config.DATASET.VALID_ROOT, list_name=config.DATASET.VALID_LIST, mode='valid', Transform='simple') # test_dataset = eval('dataset.' + config.DATASET.EVAL_SET + '.' + config.DATASET.EVAL_SET)( # root=config.DATASET.TEST_ROOT, list_name=config.DATASET.TEST_LIST, mode='test', Transform='simple') train_dataset = mydataset(datapath + 'train15k', datapath + 'origin5k') valid_dataset = mydataset(datapath + 'generatorBlendedRandomGaussian', datapath + 'origin') test_dataset = mydataset(datapath + 'test1k', datapath + 'test_o500') train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN.BATCH_SIZE_PER_GPU * len(gpus), shuffle=config.TRAIN.SHUFFLE, num_workers=config.WORKERS, pin_memory=config.PIN_MEMORY) def cycle(loader): while True: for x in loader: yield x op = getattr(loader.dataset, "generate", None) if callable(op): op() train_generator = iter(cycle(train_loader)) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.TEST.BATCH_SIZE_PER_GPU * len(gpus), shuffle=False, num_workers=config.WORKERS, pin_memory=config.PIN_MEMORY) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=config.TEST.BATCH_SIZE_PER_GPU * len(gpus), shuffle=False, num_workers=config.WORKERS, pin_memory=config.PIN_MEMORY) for iteration in range(last_iter, config.TRAIN.END_ITER, config.TRAIN.EVAL_ITER): # 前50000次迭代锁定原hrnet层参数训练,后面的迭代训练所有参数 if not NNB_GRAD and iteration >= 50000: if len(gpus) > 0: nnb.module.pretrained_grad(True) else: nnb.pretrained_grad(True) NNB_GRAD = True # train for one epoch train(config, train_generator, nnb, nnc, criterion, optimizer, iteration, writer_dict, _print, lr_scheduler=lr_scheduler) # evaluate on validation set perf_indicator = validate(config, valid_loader, nnb, nnc, criterion, writer_dict, _print) test(config, test_loader, nnb, nnc, criterion, writer_dict, _print) # 保存目前准确率最高的模型 # if perf_indicator > best_perf: # best_perf = perf_indicator # torch.save(model.module.state_dict(), './output/BI_dataset/bestfaceXray_'+str(best_perf)+'.pth') # _print('[Save best model] ./output/BI_dataset/bestfaceXray_'+str(best_perf)+'.pth\t') iter_now = iteration + config.TRAIN.EVAL_ITER if (iteration // config.TRAIN.EVAL_ITER) % 2 == 0: torch.save( nnb.module.state_dict(), './output/BI_dataset2/faceXray_' + str(iter_now) + '.pth') torch.save(nnc.module.state_dict(), './output/BI_dataset2/nnc' + str(iter_now) + '.pth') _print('[Save model] ./output/BI_dataset2/faceXray_' + str(iter_now) + '.pth\t') _print('[Save the last model] ./output/BI_dataset2/nnc' + str(iter_now) + '.pth\t') # lr_scheduler.step() # 最后的模型 torch.save(nnb.module.state_dict(), './output/BI_dataset/faceXray.pth') torch.save(nnc.module.state_dict(), './output/BI_dataset/nnc.pth') _print('[Save the last model] ./output/BI_dataset/faceXray.pth\t') _print('[Save the last model] ./output/BI_dataset/nnc.pth\t') writer_dict['writer'].close()
def main(): args = parse_args() # cudnn related setting cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED model = eval('models.' + config.MODEL.NAME + '.get_nnb')(config) writer_dict = { 'writer': SummaryWriter(log_dir='./output/facexray/tensorboard/tensorboard' + '_' + datetime.now().strftime('%Y%m%d_%H%M%S')), 'train_global_steps': 0, 'valid_global_steps': 0, } # log init save_dir = os.path.join('./output/facexray/log/log' + '_' + datetime.now().strftime('%Y%m%d_%H%M%S')) if os.path.exists(save_dir): raise NameError('model dir exists!') os.makedirs(save_dir) logging = init_log(save_dir) _print = logging.info gpus = list(config.GPUS) model = torch.nn.DataParallel(model, device_ids=gpus).cuda() # define loss function (criterion) and optimizer criterion = Loss() # 一些参数 optimizer = get_optimizer(config, model) last_epoch = config.TRAIN.BEGIN_EPOCH best_perf = 0.0 if isinstance(config.TRAIN.LR_STEP, list): lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR, last_epoch - 1) else: lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR, last_epoch - 1) # Data loading code # list_name没有单独标注在.yaml文件 # transform还没能适用于其他规格,应做成[256, 256, 3] train_dataset = eval('dataset.' + config.DATASET.DATASET + '.' + config.DATASET.DATASET)(config.DATASET.ROOT, config.DATASET.TRAIN_SET, None, transforms.Compose([ transforms.Resize(256), transforms.ToTensor() ])) valid_dataset = eval('dataset.' + config.DATASET.DATASET + '.' + config.DATASET.DATASET)(config.DATASET.ROOT, config.DATASET.TEST_SET, None, transforms.Compose([ transforms.Resize(256), transforms.ToTensor() ])) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN.BATCH_SIZE_PER_GPU * len(gpus), shuffle=config.TRAIN.SHUFFLE, num_workers=config.WORKERS, pin_memory=config.PIN_MEMORY) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.TEST.BATCH_SIZE_PER_GPU * len(gpus), shuffle=False, num_workers=config.WORKERS, pin_memory=config.PIN_MEMORY) for epoch in range(last_epoch, config.TRAIN.END_EPOCH): lr_scheduler.step() # 前50000次迭代锁定原hrnet层参数训练,后面的迭代训练所有参数 if epoch == 25000: for k, v in model.named_parameters(): v.requires_grad = True # train for one epoch train(config, train_loader, model, criterion, optimizer, epoch, writer_dict, _print) # evaluate on validation set perf_indicator = validate(config, valid_loader, model, criterion, writer_dict, _print) # 保存目前准确率最高的模型 # if perf_indicator > best_perf: # best_perf = perf_indicator # torch.save(model.module.state_dict(), './output/BI_dataset/bestfaceXray_'+str(best_perf)+'.pth') # _print('[Save best model] ./output/BI_dataset/bestfaceXray_'+str(best_perf)+'.pth\t') if epoch % 25000 == 0: torch.save(model.module.state_dict(), './output/BI_dataset/faceXray_' + str(epoch) + '.pth') _print('[Save model] ./output/BI_dataset/faceXray_' + str(epoch) + '.pth\t') # 最后的模型 torch.save(model.module.state_dict(), './output/BI_dataset/faceXray.pth') _print('[Save the last model] ./output/BI_dataset/faceXray.pth\t') writer_dict['writer'].close()
def main(): args = parse_args() # cudnn related setting cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED nnb = models.nnb.get_nnb(config) # 不锁定参数 TODO: optimzer 中途添加参数 # nnb = models.ae.get_ae() # nnb = models.fcn.get_fcn(config) # 训练时令nnc的softmax不起作用 nnc = models.nnc.get_nnc(config) writer_dict = { 'writer': SummaryWriter(log_dir='./output/facexray/tensorboard/tensorboard' + '_' + datetime.now().strftime('%Y%m%d_%H%M%S')), 'train_global_steps': 0, 'valid_global_steps': 0, 'test_global_steps': 0, } # log init save_dir = os.path.join('./output/facexray/log/log' + '_' + datetime.now().strftime('%Y%m%d_%H%M%S')) if os.path.exists(save_dir): raise NameError('model dir exists!') os.makedirs(save_dir) logging = init_log(save_dir) _print = logging.info gpus = list(config.GPUS) nnb = torch.nn.DataParallel(nnb, device_ids=gpus).cuda() nnc = torch.nn.DataParallel(nnc, device_ids=gpus).cuda() # define loss function (criterion) and optimizer criterion = Loss() # 一些参数 # 初始化optimzer,训练除nnb的原hrnet参数外的参数 optimizer = get_optimizer(config, [nnb, nnc]) # TODO: 暂时直接全部初始化 last_epoch = config.TRAIN.BEGIN_EPOCH best_perf = 0.0 if isinstance(config.TRAIN.LR_STEP, list): lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR, last_epoch - 1) else: lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, config.TRAIN.LR_STEP, config.TRAIN.LR_FACTOR, last_epoch - 1) # Data loading code # transform还没能适用于其他规格,应做成[256, 256, 3] train_dataset = eval('dataset.' + config.DATASET.DATASET + '.' + config.DATASET.DATASET)( root=config.DATASET.TRAIN_ROOT, list_name=config.DATASET.TRAIN_LIST, mode='train', Transform='strong_pixel') valid_dataset = eval('dataset.' + config.DATASET.DATASET + '.' + config.DATASET.DATASET)( root=config.DATASET.VALID_ROOT, list_name=config.DATASET.VALID_LIST, mode='valid', Transform='easy') test_dataset = eval('dataset.' + config.DATASET.DATASET + '.' + config.DATASET.DATASET)( root=config.DATASET.TEST_ROOT, list_name=config.DATASET.TEST_LIST, mode='test', Transform='easy') train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.TRAIN.BATCH_SIZE_PER_GPU * len(gpus), shuffle=config.TRAIN.SHUFFLE, num_workers=config.WORKERS, pin_memory=config.PIN_MEMORY) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.TEST.BATCH_SIZE_PER_GPU * len(gpus), shuffle=False, num_workers=config.WORKERS, pin_memory=config.PIN_MEMORY) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=config.TEST.BATCH_SIZE_PER_GPU * len(gpus), shuffle=False, num_workers=config.WORKERS, pin_memory=config.PIN_MEMORY) for epoch in range(last_epoch, config.TRAIN.END_EPOCH): # 前50000次迭代锁定原hrnet层参数训练,后面的迭代训练所有参数 # 暂时先不管 warming up # if epoch == 25000: # for k, v in nnb.named_parameters(): # v.requires_grad = True # train for one epoch train(config, train_loader, nnb, nnc, criterion, optimizer, epoch, writer_dict, _print) # evaluate on validation set perf_indicator = validate(config, valid_loader, nnb, nnc, criterion, writer_dict, _print) test(config, test_loader, nnb, nnc, criterion, writer_dict, _print) # 保存目前准确率最高的模型 # if perf_indicator > best_perf: # best_perf = perf_indicator # torch.save(model.module.state_dict(), './output/BI_dataset/bestfaceXray_'+str(best_perf)+'.pth') # _print('[Save best model] ./output/BI_dataset/bestfaceXray_'+str(best_perf)+'.pth\t') if epoch % 2 == 0: torch.save(nnb.module.state_dict(), './output/BI_dataset2/faceXray_' + str(epoch) + '.pth') torch.save(nnc.module.state_dict(), './output/BI_dataset2/nnc' + str(epoch) + '.pth') _print('[Save model] ./output/BI_dataset2/faceXray_' + str(epoch) + '.pth\t') _print('[Save the last model] ./output/BI_dataset2/nnc' + str(epoch) + '.pth\t') lr_scheduler.step() # 最后的模型 torch.save(nnb.module.state_dict(), './output/BI_dataset/faceXray.pth') torch.save(nnc.module.state_dict(), './output/BI_dataset/nnc.pth') _print('[Save the last model] ./output/BI_dataset/faceXray.pth\t') _print('[Save the last model] ./output/BI_dataset/nnc.pth\t') writer_dict['writer'].close()
for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) global_steps = tf.Variable(0, trainable=False, dtype=tf.int64) trainset = Dataset('train') model = RPNplus() model.build(input_shape=(None, image_height, image_width, 3)) model.summary() # TensorBoard if os.path.exists(logdir): shutil.rmtree(logdir) writer = tf.summary.create_file_writer(logdir) optimizer = tf.keras.optimizers.Adam(lr=cfg.TRAIN.LR) loss = Loss() def train_step(images, target_scores, target_bboxes, target_masks, epoch): with tf.GradientTape() as tape: pred_scores, pred_bboxes = model(images, training=True) # computing Loss score_loss, boxes_loss = loss.compute_loss(target_scores, target_bboxes, target_masks, pred_scores, pred_bboxes) total_loss = score_loss + lambda_scale * boxes_loss # Gradient gradients = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables))
def eval_step(model, x_test, y_test): y_pred = model(x_test) loss = Loss()(y_test, y_pred) return loss