def test(): engine = Engine() #define test dataset data_config = getDatasetConfig(config.dataset_tag) test_dataset = CustomDataset(data_config['val'], data_config['val_root'], False) test_loader = DataLoader(test_dataset, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True) # define model net = init_model(pretrained=True, model_name=config.model_name, class_num=config.class_num) # load checkpoint use_gpu = torch.cuda.is_available() and config.use_gpu if use_gpu: net = net.cuda() gpu_ids = [int(r) for r in config.gpu_ids.split(',')] if use_gpu and len(gpu_ids) > 1: net = torch.nn.DataParallel(net, device_ids=gpu_ids) ckpt = torch.load(config.model_load_path) net.load_state_dict( {k.replace('module.', ''): v for k, v in ckpt['state_dict'].items()}) # define loss criterion = torch.nn.CrossEntropyLoss() if use_gpu: criterion = criterion.cuda() prec1, prec5 = engine.test(test_loader, net, criterion)
def validate(): ## engine = Engine() config = getConfig() device = torch.device("cuda:" + str(config.device)) # define dataset transform_test = transforms.Compose([ transforms.Resize((config.image_size, config.image_size)), # transforms.CenterCrop(config.input_size), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) val_dataset = CustomDataset('data/movie_val.csv', 'data/movie/images', transform=transform_test) val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True) net = resnet101(pretrained=True, use_bap=False) in_features = net.fc_new.in_features new_linear = torch.nn.Linear(in_features=in_features, out_features=25) net.fc_new = new_linear # load checkpoint use_gpu = torch.cuda.is_available() and config.use_gpu if use_gpu: net = net.to(device) gpu_ids = [int(r) for r in config.gpu_ids.split(',')] if use_gpu and config.multi_gpu: net = torch.nn.DataParallel(net, device_ids=gpu_ids) checkpoint_path = os.path.join(config.checkpoint_path, 'model_best.pth.tar') load_state_dict = torch.load(checkpoint_path, map_location=device)['state_dict'] new_state_dict = {} for key, value in load_state_dict.items(): new_key = key.replace('module.', '') new_state_dict[new_key] = value net.load_state_dict(new_state_dict) # define loss criterion = torch.nn.BCEWithLogitsLoss() if use_gpu: criterion = criterion.cuda() state = { 'model': net, 'val_loader': val_loader, 'criterion': criterion, 'config': config, 'device': device, 'step': 0, 'lr': config.lr } prec1, fprec, val_loss = engine.validate(state) print(prec1)
def test(): ## engine = Engine() config = getConfig() data_config = getDatasetConfig(config.dataset) # define dataset transform_test = transforms.Compose([ transforms.Resize((config.image_size, config.image_size)), transforms.CenterCrop(config.input_size), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) val_dataset = CustomDataset(data_config['val'], data_config['val_root'], transform=transform_test) val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True) # define model if config.model_name == 'inception': net = inception_v3_bap(pretrained=True, aux_logits=False) elif config.model_name == 'resnet50': net = resnet50(pretrained=True) in_features = net.fc_new.in_features new_linear = torch.nn.Linear(in_features=in_features, out_features=val_dataset.num_classes) net.fc_new = new_linear # load checkpoint use_gpu = torch.cuda.is_available() and config.use_gpu if use_gpu: net = net.cuda() gpu_ids = [int(r) for r in config.gpu_ids.split(',')] if use_gpu and len(gpu_ids) > 1: net = torch.nn.DataParallel(net, device_ids=gpu_ids) #checkpoint_path = os.path.join(config.checkpoint_path,'model_best.pth.tar') net.load_state_dict(torch.load(config.checkpoint_path)['state_dict']) # define loss # define loss criterion = torch.nn.CrossEntropyLoss() if use_gpu: criterion = criterion.cuda() prec1, prec5 = engine.test(val_loader, net, criterion)
def train(): # input params config = getConfig() torch.manual_seed(GLOBAL_SEED) torch.cuda.manual_seed(GLOBAL_SEED) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = True torch.cuda.set_device(config.device) best_prec1 = 0. rate = 0.875 device = torch.device("cuda:" + str(config.device)) # define train_dataset and loader transform_train = transforms.Compose([ transforms.Resize( (int(config.input_size // rate), int(config.input_size // rate))), transforms.RandomCrop((config.input_size, config.input_size)), transforms.RandomVerticalFlip(), transforms.ColorJitter(brightness=32. / 255., saturation=0.5), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) train_dataset = CustomDataset('data/movie_train.csv', 'data/movie/images', transform=transform_train) train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=config.workers, pin_memory=True, worker_init_fn=_init_fn) transform_test = transforms.Compose([ transforms.Resize((config.image_size, config.image_size)), transforms.CenterCrop(config.input_size), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) val_dataset = CustomDataset('data/movie_val.csv', 'data/movie/images', transform=transform_test) val_loader = DataLoader(val_dataset, batch_size=config.batch_size * 2, shuffle=False, num_workers=config.workers, pin_memory=True, worker_init_fn=_init_fn) # logging dataset info print('Train:[{train_num}], Val:[{val_num}]'.format( train_num=len(train_dataset), val_num=len(val_dataset))) print('Batch Size:[{0}], Total:::Train Batches:[{1}],Val Batches:[{2}]'. format(config.batch_size, len(train_loader), len(val_loader))) net = resnet101(pretrained=True, use_bap=False) in_features = net.fc_new.in_features new_linear = torch.nn.Linear(in_features=in_features, out_features=train_dataset.num_classes) net.fc_new = new_linear # gpu config use_gpu = torch.cuda.is_available() and config.use_gpu if use_gpu: net = net.to(device) gpu_ids = [int(r) for r in config.gpu_ids.split(',')] if use_gpu and config.multi_gpu: net = torch.nn.DataParallel(net, device_ids=gpu_ids) # define optimizer assert config.optim in ['sgd', 'adam'], 'optim name not found!' if config.optim == 'sgd': optimizer = torch.optim.SGD(net.parameters(), lr=config.lr, momentum=config.momentum, weight_decay=config.weight_decay) elif config.optim == 'adam': optimizer = torch.optim.Adam(net.parameters(), lr=config.lr, weight_decay=config.weight_decay) # define learning scheduler assert config.scheduler in ['plateau', 'step', 'cosine_annealing' ], 'scheduler not supported!!!' if config.scheduler == 'plateau': scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.1) elif config.scheduler == 'step': scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.9) # define loss criterion = torch.nn.BCEWithLogitsLoss() # train val parameters dict state = { 'model': net, 'train_loader': train_loader, 'val_loader': val_loader, 'criterion': criterion, 'config': config, 'optimizer': optimizer, 'device': device, 'step': 0, 'lr': config.lr } ## train and val engine = Engine() print(config) best_prec = 0 for e in range(config.epochs): if config.scheduler == 'step': scheduler.step() lr_val = get_lr(optimizer) print("Start epoch %d ==========,lr=%f" % (e, lr_val)) train_prec, train_loss = engine.train(state, e) prec, val_loss = engine.validate(state) is_best = prec > best_prec best_prec = max(prec, best_prec) print('Epoch: {}, Train-Loss: {:.4f}, Train-accuracy: {:.4f},'\ 'Test-accuracy: {:.4f}'.format(e + 1, train_loss, train_prec, prec)) print('Best accuracy: {:.4f}'.format(best_prec)) save_checkpoint( { 'epoch': e + 1, 'state_dict': net.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, config.output) if config.scheduler == 'plateau': scheduler.step(val_loss)
configs = model.configs #%% augmentation dataset and dataloader if args.is_tta: test_transform = TrainAugmentation_albu(size=configs.image_size, mean=configs.image_mean, std=configs.image_std) else: test_transform = TestAugmentation_albu(size=configs.image_size, mean=configs.image_mean, std=configs.image_std) if args.is_valid: dataset = CustomDataset(root=args.datasets, transform=test_transform, label_name=configs.dict_label) else: dataset = CustomDataset(root=args.datasets, transform=test_transform, label_name=configs.dict_label, is_test=True) test_loader = DataLoader(dataset, batch_size=2 * args.batch_size, num_workers=args.num_workers, shuffle=False) #%% net to DEVICE use_cuda = torch.cuda.is_available() device = torch.device('cuda' if use_cuda else 'cpu')
def main(): parser = argparse.ArgumentParser() parser.add_argument( "--test_mode", "-t", type=str, default="run_inference", choices=['generate_train', 'generate_result', 'run_inference'], help= 'Type of test. One of "generate_train": generate refineNet datasets, ' '"generate_result": save inference result and groundtruth, ' '"run_inference": save inference result for input images.') parser.add_argument( "--data_mode", "-d", type=str, default="test", choices=['test', 'generation'], help= 'Only used for "generate_train" test_mode, "generation" for refineNet train dataset,' '"test" for refineNet test dataset.') parser.add_argument("--SMAP_path", "-p", type=str, default='log/SMAP.pth', help='Path to SMAP model') parser.add_argument( "--RefineNet_path", "-rp", type=str, default='', help='Path to RefineNet model, empty means without RefineNet') parser.add_argument("--batch_size", type=int, default=1, help='Batch_size of test') parser.add_argument("--do_flip", type=float, default=0, help='Set to 1 if do flip when test') parser.add_argument("--dataset_path", type=str, default="", help='Image dir path of "run_inference" test mode') parser.add_argument("--json_name", type=str, default="", help='Add a suffix to the result json.') args = parser.parse_args() cfg.TEST_MODE = args.test_mode cfg.DATA_MODE = args.data_mode cfg.REFINE = len(args.RefineNet_path) > 0 cfg.DO_FLIP = args.do_flip cfg.JSON_SUFFIX_NAME = args.json_name cfg.TEST.IMG_PER_GPU = args.batch_size os.makedirs(cfg.TEST_DIR, exist_ok=True) logger = get_logger(cfg.DATASET.NAME, cfg.TEST_DIR, 0, 'test_log_{}.txt'.format(args.test_mode)) model = SMAP(cfg, run_efficient=cfg.RUN_EFFICIENT) device = torch.device(cfg.MODEL.DEVICE) model.to(device) if args.test_mode == "run_inference": test_dataset = CustomDataset(cfg, args.dataset_path) data_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False) else: data_loader = get_test_loader(cfg, num_gpu=1, local_rank=0, stage=args.data_mode) if cfg.REFINE: refine_model = RefineNet() refine_model.to(device) refine_model_file = args.RefineNet_path else: refine_model = None refine_model_file = "" model_file = args.SMAP_path if os.path.exists(model_file): state_dict = torch.load(model_file, map_location=lambda storage, loc: storage) state_dict = state_dict['model'] model.load_state_dict(state_dict) if os.path.exists(refine_model_file): refine_model.load_state_dict(torch.load(refine_model_file)) elif refine_model is not None: logger.info("No such RefineNet checkpoint of {}".format( args.RefineNet_path)) return generate_3d_point_pairs(model, refine_model, data_loader, cfg, logger, device, output_dir=os.path.join( cfg.OUTPUT_DIR, "result")) else: logger.info("No such checkpoint of SMAP {}".format(args.SMAP_path))
def train(args): # --- read the setting for training random_seed = args.randomSeed torch.backends.cudnn.enabled = args.useCuda torch.manual_seed(random_seed) n_epochs = args.numEpochs batch_size_train = args.batchSize log_interval = args.logInterval log_valinterval = args.logValInterval learning_rate = args.LR momentum = args.Momentum # data_config data_config = {} data_config['split'] = 'train' data_config["root_dir"] = args.customDataFolder data_config['input_resolution'] = args.inputResolution data_config['use_data_aug'] = args.useDataAug custom_data = CustomDataset(data_config=data_config) data_loader = DataLoader(custom_data, batch_size=batch_size_train, shuffle=True, num_workers=0) checkpoint_dir = './checkpoints' if not os.path.exists(checkpoint_dir): os.mkdir(checkpoint_dir) model_path = os.path.join(checkpoint_dir, args.modelName) if not os.path.exists(model_path): os.mkdir(model_path) network = TransferNet(num_classes=custom_data.get_num_of_classes()) network = network.float() # optimizer optimizer = optim.SGD(filter(lambda p: p.requires_grad, network.parameters()), lr=learning_rate, momentum=momentum) # load model if specified if args.useCuda: device = 'gpu' else: device = 'cpu' if args.loadModel: load_model(args.modelName, model_path, network, optimizer, device=device) if args.useCuda: network.cuda() eval_dict = test(args, network, custom_data) # train for epoch in range(n_epochs): train_corrects = 0 train_losses = [] for batch_idx, sample_batched in enumerate(data_loader): network.train() optimizer.zero_grad() if args.useCuda: sample_image = sample_batched['image'].float().cuda() else: sample_image = sample_batched['image'].float() output = network(sample_image) loss, correct = classify_loss_acc(sample_batched['label'], output, use_cuda=args.useCuda) train_corrects += correct train_losses.append(loss.item()) loss.backward() optimizer.step() if batch_idx % log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss_lv1: {:.6f}'. format(epoch, batch_idx * len(sample_batched['image']), len(data_loader.dataset), 100. * batch_idx / len(data_loader), loss.item())) train_losses.append(loss.item()) if epoch % log_valinterval == 0: print( '\nTrain set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'. format(np.mean(train_losses), train_corrects, len(data_loader.dataset), 100. * train_corrects / len(data_loader.dataset))) eval_dict = test(args, network, custom_data) # save the model after each epoch torch.save(network.state_dict(), os.path.join(model_path, 'model.pth')) torch.save(optimizer.state_dict(), os.path.join(model_path, 'optimizer.pth')) print("training finished")
# models # C:\Users\Moro/.cache\torch\checkpoints # Downloading: "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth" to C:\Users\Moro/.cache\torch\checkpoints\resnet101-5d3b4d8f.pth args = parse_args() if torch.cuda.is_available() and not args.cuda: print("WARNING: You might want to run with --cuda") if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) # dataset if args.dataset == 'custom': train_dataset = CustomDataset( root='D:/DataSets/RGB2Depth/20200602_112100/' ) # KittiDataset(train=True) eval_dataset = CustomDataset( root='D:/DataSets/RGB2Depth/20200602_112100/' ) # KittiDataset(train=False) train_size = len(train_dataset) eval_size = len(eval_dataset) print(train_size, eval_size) train_batch_sampler = sampler(train_size, args.bs) eval_batch_sampler = sampler(eval_size, args.bs) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=args.bs, shuffle=True,
def train(): # input params set_seed(GLOBAL_SEED) config = getConfig() data_config = getDatasetConfig(config.dataset) sw_log = 'logs/%s' % config.dataset sw = SummaryWriter(log_dir=sw_log) best_prec1 = 0. rate = 0.875 # define train_dataset and loader transform_train = transforms.Compose([ transforms.Resize( (int(config.input_size // rate), int(config.input_size // rate))), transforms.RandomCrop((config.input_size, config.input_size)), transforms.RandomHorizontalFlip(), transforms.ColorJitter(brightness=32. / 255., saturation=0.5), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) train_dataset = CustomDataset(data_config['train'], data_config['train_root'], transform=transform_train) train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=config.workers, pin_memory=True, worker_init_fn=_init_fn) transform_test = transforms.Compose([ transforms.Resize((config.image_size, config.image_size)), transforms.CenterCrop(config.input_size), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) val_dataset = CustomDataset(data_config['val'], data_config['val_root'], transform=transform_test) val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True, worker_init_fn=_init_fn) # logging dataset info print('Dataset Name:{dataset_name}, Train:[{train_num}], Val:[{val_num}]'. format(dataset_name=config.dataset, train_num=len(train_dataset), val_num=len(val_dataset))) print('Batch Size:[{0}], Total:::Train Batches:[{1}],Val Batches:[{2}]'. format(config.batch_size, len(train_loader), len(val_loader))) # define model if config.model_name == 'inception': net = inception_v3_bap(pretrained=True, aux_logits=False, num_parts=config.parts) elif config.model_name == 'resnet50': net = resnet50(pretrained=True, use_bap=True) in_features = net.fc_new.in_features new_linear = torch.nn.Linear(in_features=in_features, out_features=train_dataset.num_classes) net.fc_new = new_linear # feature center feature_len = 768 if config.model_name == 'inception' else 512 center_dict = { 'center': torch.zeros(train_dataset.num_classes, feature_len * config.parts) } # gpu config use_gpu = torch.cuda.is_available() and config.use_gpu if use_gpu: net = net.cuda() center_dict['center'] = center_dict['center'].cuda() gpu_ids = [int(r) for r in config.gpu_ids.split(',')] if use_gpu and config.multi_gpu: net = torch.nn.DataParallel(net, device_ids=gpu_ids) # define optimizer assert config.optim in ['sgd', 'adam'], 'optim name not found!' if config.optim == 'sgd': optimizer = torch.optim.SGD(net.parameters(), lr=config.lr, momentum=config.momentum, weight_decay=config.weight_decay) elif config.optim == 'adam': optimizer = torch.optim.Adam(net.parameters(), lr=config.lr, weight_decay=config.weight_decay) # define learning scheduler assert config.scheduler in ['plateau', 'step'], 'scheduler not supported!!!' if config.scheduler == 'plateau': scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.1) elif config.scheduler == 'step': scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.9) # define loss criterion = torch.nn.CrossEntropyLoss() if use_gpu: criterion = criterion.cuda() # train val parameters dict state = { 'model': net, 'train_loader': train_loader, 'val_loader': val_loader, 'criterion': criterion, 'center': center_dict['center'], 'config': config, 'optimizer': optimizer } ## train and val engine = Engine() print(config) for e in range(config.epochs): if config.scheduler == 'step': scheduler.step() lr_val = get_lr(optimizer) print("Start epoch %d ==========,lr=%f" % (e, lr_val)) train_prec, train_loss = engine.train(state, e) prec1, val_loss = engine.validate(state) is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': e + 1, 'state_dict': net.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), 'center': center_dict['center'] }, is_best, config.checkpoint_path) sw.add_scalars("Accurancy", {'train': train_prec, 'val': prec1}, e) sw.add_scalars("Loss", {'train': train_loss, 'val': val_loss}, e) if config.scheduler == 'plateau': scheduler.step(val_loss)
def test(): ## engine = Engine() config = getConfig() data_config = getDatasetConfig(config.dataset) # define dataset print("At test") transform_test = transforms.Compose([ transforms.Resize((config.image_size, config.image_size)), transforms.CenterCrop(config.input_size), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) val_dataset = CustomDataset(data_config['val'], data_config['val_root'], transform=transform_test) val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True) # define model if config.model_name == 'inception': net = inception_v3_bap(pretrained=True, aux_logits=False) elif config.model_name == 'resnet50': net = resnet50(pretrained=True, use_bap=True) in_features = net.fc_new.in_features new_linear = torch.nn.Linear(in_features=in_features, out_features=val_dataset.num_classes) net.fc_new = new_linear # load checkpoint use_gpu = torch.cuda.is_available() and config.use_gpu if use_gpu: net = net.cuda() gpu_ids = [int(r) for r in config.gpu_ids.split(',')] if use_gpu and False: net = torch.nn.DataParallel(net, device_ids=gpu_ids) #checkpoint_path = os.path.join(config.checkpoint_path,'model_best.pth.tar') net.load_state_dict(torch.load(config.checkpoint_path)['state_dict']) # define loss # define loss criterion = torch.nn.CrossEntropyLoss() if use_gpu: criterion = criterion.cuda() prec1, prec5, pred_array, true_array = engine.test(val_loader, net, criterion) # t_a = true_array.cpu().numpy() # p_a = pred_array.cpu().numpy() # import pickle # pickle_in = open("file.pkl","rb") # pickle_dict = pickle.load(pickle_in) # #pickle_dict = dict(zip(pickle_dict, map(int, pickle_dict.keys()))) # # print(type(pickle_dict.keys()),type(pickle_dict.values()[0])) # pickle_dict = dict(map(lambda x: (int(x[0]), x[1]), pickle_dict.items())) # print(list(set(list(pickle_dict.values())))) # print(len(list(set(list(pickle_dict.values()))))) # true_array = np.vectorize(pickle_dict.get)(t_a) # pred_array = np.vectorize(pickle_dict.get)(p_a) conf_mat = metrics.confusion_matrix(true_array.cpu().numpy(), pred_array.cpu().numpy()) print("Prec1 : {}".format(prec1)) print("conf_mat :\n {}".format(conf_mat)) #plt.figure() #plot_confusion_matrix(conf_mat, [z for z in range(258)]) #plt.show() np.savetxt("conf_mat.txt", conf_mat)
def get_dataloader(df, labels, cfg): dataset = CustomDataset(df, labels, cfg) loader = DataLoader(dataset, **cfg.loader) return loader
set_seed(args.seed) #%%model model = models.ISICModel(n_class = args.n_class,arch = args.net ) model.init() configs = model.configs args.batch_size = configs.batch_size #%% augmentation dataset and dataloader train_transform = TrainAugmentation_albu(size = configs.image_size, mean = configs.image_mean, std = configs.image_std) test_transform = TestAugmentation_albu(size = configs.image_size, mean = configs.image_mean, std = configs.image_std) train_dataset = CustomDataset(root = args.datasets, transform = train_transform,label_name =configs.dict_label) valid_dataset = CustomDataset(root = args.validation_dataset, transform = test_transform,label_name =configs.dict_label) if args.imbalance_batchsampler==1: train_loader = DataLoader(train_dataset, batch_size = 2*args.batch_size, sampler=ImbalancedDatasetSampler(train_dataset),num_workers=args.num_workers) else: train_loader = DataLoader(train_dataset, batch_size = 2*args.batch_size, num_workers=args.num_workers,shuffle = True) valid_loader = DataLoader(valid_dataset, batch_size = 2*args.batch_size, num_workers=args.num_workers, shuffle=False)
def train(): config_print() print("SEED : {}".format(GLOBAL_SEED)) os.environ["CUDA_VISIBLE_DEVICES"] = config.gpu_ids set_seed(GLOBAL_SEED) best_prec1 = 0. write_log = 'logs/%s' % config.dataset_tag + config.gpu_ids write_val_log = 'logs/val%s' % config.dataset_tag + config.gpu_ids write = SummaryWriter(log_dir=write_log) write_val = SummaryWriter(log_dir=write_val_log) data_config = getDatasetConfig(config.dataset_tag) #load dataset train_dataset = CustomDataset(data_config['train'], data_config['train_root'], True) #txt.file,train_root_dir,is_traning train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=config.workers, pin_memory=True, worker_init_fn=_init_fn) val_dataset = CustomDataset(data_config['val'], data_config['val_root'], False) val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True) #,worker_init_fn=_init_fn) print('Dataset Name:{dataset_name}, Train:[{train_num}], Val:[{val_num}]'. format(dataset_name=config.dataset_tag, train_num=len(train_dataset), val_num=len(val_dataset))) # define model net = init_model(pretrained=True, model_name=config.model_name, class_num=config.class_num) # gup config use_gpu = torch.cuda.is_available() and config.use_gpu if use_gpu: net = net.cuda() gpu_ids = [int(r) for r in config.gpu_ids.split(',')] if use_gpu and config.multi_gpu: net = torch.nn.DataParallel(net, device_ids=gpu_ids) # define potimizer assert config.optimizer in ['sgd', 'adam'], 'optim name not found!' if config.optimizer == 'sgd': optimizer = torch.optim.SGD(net.parameters(), lr=config.learning_rate, momentum=config.momentum, weight_decay=config.weight_decay) elif config.optimizer == 'adam': optimizer = torch.optim.Adam(net.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) # define learning scheduler assert config.scheduler in ['plateau', 'step', 'muilt_step', 'cosine'], 'scheduler not supported!!!' if config.scheduler == 'plateau': scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.1) elif config.scheduler == 'step': scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.9) elif config.scheduler == 'muilt_step': scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30, 100], gamma=0.1) elif config.scheduler == 'cosine': scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=config.epochs) # define loss criterion = torch.nn.CrossEntropyLoss() if use_gpu: criterion = criterion.cuda() # train val parameters dict state = { 'model': net, 'train_loader': train_loader, 'val_loader': val_loader, 'criterion': criterion, 'config': config, 'optimizer': optimizer, 'write': write, 'write_val': write_val } # define resume start_epoch = 0 if config.resume: ckpt = torch.load(config.resume) net.load_state_dict(ckpt['state_dict']) start_epoch = ckpt['epoch'] best_prec1 = ckpt['best_prec1'] optimizer.load_state_dict(ckpt['optimizer']) # train and val engine = Engine() for e in range(start_epoch, config.epochs + 1): if config.scheduler in ['step', 'muilt_step']: scheduler.step() lr_train = get_lr(optimizer) print("Start epoch %d ==========,lr=%f" % (e, lr_train)) train_prec, train_loss = engine.train(state, e) prec1, val_loss = engine.validate(state, e) is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': e + 1, 'state_dict': net.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict() }, is_best, config.checkpoint_path) write.add_scalars("Accurancy", {'train': train_prec, 'val': prec1}, e) write.add_scalars("Loss", {'train': train_loss, 'val': val_loss}, e) if config.scheduler == 'plateau': scheduler.step(val_loss)