def train(cfg, network): # cfg.train.num_workers = 0 if cfg.train.dataset[:4] != 'City': torch.multiprocessing.set_sharing_strategy('file_system') train_loader = make_data_loader(cfg, is_train=True, max_iter=cfg.ep_iter) val_loader = make_data_loader(cfg, is_train=False) # train_loader = make_data_loader(cfg, is_train=True, max_iter=100) trainer = make_trainer(cfg, network) optimizer = make_optimizer(cfg, network) scheduler = make_lr_scheduler(cfg, optimizer) recorder = make_recorder(cfg) evaluator = make_evaluator(cfg) begin_epoch = load_model(network, optimizer, scheduler, recorder, cfg.model_dir, resume=cfg.resume) # set_lr_scheduler(cfg, scheduler) for epoch in range(begin_epoch, cfg.train.epoch): recorder.epoch = epoch trainer.train(epoch, train_loader, optimizer, recorder) scheduler.step() if (epoch + 1) % cfg.save_ep == 0: save_model(network, optimizer, scheduler, recorder, epoch, cfg.model_dir) if (epoch + 1) % cfg.eval_ep == 0: trainer.val(epoch, val_loader, evaluator, recorder) return network
def run_evaluate(): from lib.datasets import make_data_loader from lib.evaluators import make_evaluator import tqdm import torch from lib.networks import make_network from lib.utils.net_utils import load_network if DEBUG: print( '-------------------------------Evaluating---------------------------------' ) network = make_network(cfg).cuda() load_network(network, cfg.model_dir, epoch=cfg.test.epoch) network.eval() data_loader = make_data_loader(cfg, is_train=False) evaluator = make_evaluator(cfg) for batch in tqdm.tqdm(data_loader): inp = batch['inp'].cuda() with torch.no_grad(): output = network(inp) evaluator.evaluate(output, batch) evaluator.summarize()
def train(cfg, network): trainer = make_trainer(cfg, network) optimizer = make_optimizer(cfg, network) scheduler = make_lr_scheduler(cfg, optimizer) recorder = make_recorder(cfg) evaluator = make_evaluator(cfg) begin_epoch = load_model(network, optimizer, scheduler, recorder, cfg.model_dir, resume=cfg.resume) # set_lr_scheduler(cfg, scheduler) train_loader = make_data_loader(cfg, is_train=True) val_loader = make_data_loader(cfg, is_train=False) for epoch in range(begin_epoch, cfg.train.epoch): recorder.epoch = epoch trainer.train(epoch, train_loader, optimizer, recorder) scheduler.step() if (epoch + 1) % cfg.save_ep == 0: save_model(network, optimizer, scheduler, recorder, epoch, cfg.model_dir) if (epoch + 1) % cfg.eval_ep == 0: trainer.val(epoch, val_loader, evaluator, recorder) return network
def run_evaluate(): from lib.datasets import make_data_loader from lib.evaluators import make_evaluator import tqdm import torch from lib.networks import make_network from lib.utils import net_utils from lib.networks.renderer import make_renderer cfg.perturb = 0 network = make_network(cfg).cuda() net_utils.load_network(network, cfg.trained_model_dir, resume=cfg.resume, epoch=cfg.test.epoch) network.train() data_loader = make_data_loader(cfg, is_train=False) renderer = make_renderer(cfg, network) evaluator = make_evaluator(cfg) for batch in tqdm.tqdm(data_loader): for k in batch: if k != 'meta': batch[k] = batch[k].cuda() with torch.no_grad(): output = renderer.render(batch) evaluator.evaluate(output, batch) evaluator.summarize()
def run_evaluate(): from lib.datasets import make_data_loader from lib.evaluators import make_evaluator import tqdm import torch from lib.networks import make_network from lib.utils.net_utils import load_network from lib.train import make_trainer network = make_network(cfg).cuda() load_network(network, cfg.model_dir, epoch=cfg.test.epoch) trainer = make_trainer(cfg, network) network.eval() data_loader = make_data_loader(cfg, is_train=False) if 'Coco' in cfg.train.dataset: trainer.val_coco(data_loader) else: evaluator = make_evaluator(cfg) for batch in tqdm.tqdm(data_loader): inp = batch['inp'].cuda() with torch.no_grad(): output = network(inp) evaluator.evaluate(output, batch) evaluator.summarize()
def test(cfg, network): trainer = make_trainer(cfg, network) val_loader = make_data_loader(cfg, is_train=False) evaluator = make_evaluator(cfg) epoch = load_network(network, cfg.model_dir, resume=cfg.resume, epoch=cfg.test.epoch) trainer.val(epoch, val_loader, evaluator)
def train(cfg, network): if cfg.train.dataset[:4] != 'City': torch.multiprocessing.set_sharing_strategy('file_system') trainer = make_trainer(cfg, network) optimizer = make_optimizer(cfg, network) scheduler = make_lr_scheduler(cfg, optimizer) recorder = make_recorder(cfg) if 'Coco' not in cfg.train.dataset: evaluator = make_evaluator(cfg) begin_epoch = load_model(network, optimizer, scheduler, recorder, cfg.model_dir, resume=cfg.resume) # set_lr_scheduler(cfg, scheduler) train_loader = make_data_loader(cfg, is_train=True) val_loader = make_data_loader(cfg, is_train=False) # train_loader = make_data_loader(cfg, is_train=True, max_iter=100) global_steps = None if cfg.neptune: global_steps = { 'train_global_steps': 0, 'valid_global_steps': 0, } neptune.init('hccccccccc/clean-pvnet') neptune.create_experiment(cfg.model_dir.split('/')[-1]) neptune.append_tag('pose') for epoch in range(begin_epoch, cfg.train.epoch): recorder.epoch = epoch trainer.train(epoch, train_loader, optimizer, recorder, global_steps) scheduler.step() if (epoch + 1) % cfg.save_ep == 0: save_model(network, optimizer, scheduler, recorder, epoch, cfg.model_dir) if (epoch + 1) % cfg.eval_ep == 0: if 'Coco' in cfg.train.dataset: trainer.val_coco(val_loader, global_steps) else: trainer.val(epoch, val_loader, evaluator, recorder) if cfg.neptune: neptune.stop() return network
def run_evaluate_nv(): from lib.datasets import make_data_loader from lib.evaluators import make_evaluator import tqdm from lib.utils import net_utils data_loader = make_data_loader(cfg, is_train=False) evaluator = make_evaluator(cfg) for batch in tqdm.tqdm(data_loader): for k in batch: if k != 'meta': batch[k] = batch[k].cuda() evaluator.evaluate(batch) evaluator.summarize()
def train(cfg, network): trainer = make_trainer(cfg, network) optimizer = make_optimizer(cfg, network) scheduler = make_lr_scheduler(cfg, optimizer) recorder = make_recorder(cfg) evaluator = make_evaluator(cfg) begin_epoch = load_model(network, optimizer, scheduler, recorder, cfg.model_dir, resume=cfg.resume) # begin_epoch = 0 #如果要继续训练那么请注释这一行 # set_lr_scheduler(cfg, scheduler) # print("before train loader") train_loader = make_data_loader(cfg, is_train=True) #到这里才读取的数据 # print("under train loader") val_loader = make_data_loader(cfg, is_train=False) # #这里是查看train_loader的相关参数个结构 # tmp_file = open('/home/tianhao.lu/code/Deep_snake/snake/Result/Contour/contour.log', 'w') # tmp_file.writelines("train_loader type:" + str(type(train_loader)) + "\n") # tmp_file.writelines("train_loader len:" + str(len(train_loader)) + "\n") # tmp_file.writelines("train_loader data:" + str(train_loader) + "\n") # for tmp_data in train_loader: # tmp_file.writelines("one train_loader data type:" + str(type(tmp_data)) + "\n") # for key in tmp_data: # tmp_file.writelines("one train_loader data key:" + str(key) + "\n") # tmp_file.writelines("one train_loader data len:" + str(len(tmp_data[key])) + "\n") # # tmp_file.writelines("one train_loader data:" + str(tmp_data) + "\n") # break # tmp_file.writelines(str("*************************************************************** \n")) # tmp_file.close() for epoch in range(begin_epoch, cfg.train.epoch): recorder.epoch = epoch trainer.train(epoch, train_loader, optimizer, recorder) scheduler.step() #optimizer.step()模型才会更新,scheduler.step()是用来调整lr的 if (epoch + 1) % cfg.save_ep == 0: save_model(network, optimizer, scheduler, recorder, epoch, cfg.model_dir) if (epoch + 1) % cfg.eval_ep == 0: trainer.val(epoch, val_loader, evaluator, recorder) return network
def run_evaluate(): from lib.datasets import make_data_loader from lib.evaluators import make_evaluator import tqdm import torch import pickle import lzma from lib.networks import make_network from lib.utils.net_utils import load_network from lib.evaluators.custom.monitor import MetricMonitor from lib.visualizers import make_visualizer torch.manual_seed(0) monitor = MetricMonitor() network = make_network(cfg).cuda() epoch = load_network(network, cfg.model_dir, epoch=cfg.test.epoch) network.eval() print("Trainable parameters: {}".format( sum(p.numel() for p in network.parameters()))) data_loader = make_data_loader(cfg, is_train=False) evaluator = make_evaluator(cfg) visualizer = make_visualizer(cfg) idx = 0 if args.vis_out: os.mkdir(args.vis_out) for batch in tqdm.tqdm(data_loader): idx += 1 inp = batch['inp'].cuda() with torch.no_grad(): output = network(inp) evaluator.evaluate(output, batch) if args.vis_out: err = evaluator.data["obj_drilltip_trans_3d"][-1] visualizer.visualize( output, batch, os.path.join( args.vis_out, "tiperr{:.4f}_idx{:04d}.png".format(err.item(), idx))) result = evaluator.summarize() monitor.add('val', epoch, result) monitor.save_metrics("metrics.pkl") monitor.plot_histogram("evaluation.html", plotly=True)
def train(cfg, network): trainer = make_trainer(cfg, network) optimizer = make_optimizer(cfg, network) scheduler = make_lr_scheduler(cfg, optimizer) recorder = make_recorder(cfg) evaluator = make_evaluator(cfg) begin_epoch = load_model(network, optimizer, scheduler, recorder, cfg.trained_model_dir, resume=cfg.resume) set_lr_scheduler(cfg, scheduler) train_loader = make_data_loader(cfg, is_train=True, is_distributed=cfg.distributed, max_iter=cfg.ep_iter) val_loader = make_data_loader(cfg, is_train=False) for epoch in range(begin_epoch, cfg.train.epoch): recorder.epoch = epoch if cfg.distributed: train_loader.batch_sampler.sampler.set_epoch(epoch) trainer.train(epoch, train_loader, optimizer, recorder) scheduler.step() if (epoch + 1) % cfg.save_ep == 0 and cfg.local_rank == 0: save_model(network, optimizer, scheduler, recorder, cfg.trained_model_dir, epoch) if (epoch + 1) % cfg.save_latest_ep == 0 and cfg.local_rank == 0: save_model(network, optimizer, scheduler, recorder, cfg.trained_model_dir, epoch, last=True) if (epoch + 1) % cfg.eval_ep == 0: trainer.val(epoch, val_loader, evaluator, recorder) return network
def run_evaluate(): from lib.datasets import make_data_loader from lib.evaluators import make_evaluator import tqdm import torch from lib.networks import make_network from lib.utils.net_utils import load_network import numpy as np np.random.seed(1000) torch.manual_seed(0) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False network = make_network(cfg).cuda() load_network(network, cfg.model_dir, epoch=cfg.test.epoch) network.eval() data_loader = make_data_loader(cfg, is_train=False) evaluator = make_evaluator(cfg) count = 1200 i = 0 for batch in tqdm.tqdm(data_loader): if i == count: break inp = batch['inp'].cuda() # save input # print(batch['img_id']) # import pickle # with open('/mbrdi/sqnap1_colomirror/gupansh/input_cat.pkl','wb') as fp: # pickle.dump(batch['inp'], fp) # input() seg_gt = batch['mask'].cuda() with torch.no_grad(): output = network(inp) evaluator.evaluate(output, batch) i += 1 evaluator.summarize()