def __init__(self, data, labels, is_train=True): super(Cifar10, self).__init__() self.data, self.labels = data, labels self.is_train = is_train assert len(self.data) == len(self.labels) mean, std = (0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616) if is_train: self.trans_weak = T.Compose([ T.Resize((32, 32)), T.PadandRandomCrop(border=4, cropsize=(32, 32)), T.RandomHorizontalFlip(p=0.5), T.Normalize(mean, std), T.ToTensor(), ]) self.trans_strong = T.Compose([ T.Resize((32, 32)), T.PadandRandomCrop(border=4, cropsize=(32, 32)), T.RandomHorizontalFlip(p=0.5), RandomAugment(2, 10), T.Normalize(mean, std), T.ToTensor(), ]) else: self.trans = T.Compose([ T.Resize((32, 32)), T.Normalize(mean, std), T.ToTensor(), ])
def __init__(self, root, mode='train'): self.samples = [] lines = os.listdir(os.path.join(root, 'GT')) for line in lines: rgbpath = os.path.join(root, 'RGB', line[:-4] + '.jpg') tpath = os.path.join(root, 'T', line[:-4] + '.jpg') maskpath = os.path.join(root, 'GT', line) self.samples.append([rgbpath, tpath, maskpath]) if mode == 'train': self.transform = transform.Compose( transform.Normalize(mean1=mean_rgb, mean2=mean_t, std1=std_rgb, std2=std_t), transform.Resize(400, 400), transform.RandomHorizontalFlip(), transform.ToTensor()) elif mode == 'test': self.transform = transform.Compose( transform.Normalize(mean1=mean_rgb, mean2=mean_t, std1=std_rgb, std2=std_t), transform.Resize(400, 400), transform.ToTensor()) else: raise ValueError
def __init__(self, cfg): self.cfg = cfg if self.cfg.mode == 'train': self.transform = transform.Compose( transform.Normalize(mean=cfg.mean, std=cfg.std), transform.Resize(size=448), transform.RandomHorizontalFlip(), transform.ToTensor()) elif self.cfg.mode == 'test' or self.cfg.mode == 'val': self.transform = transform.Compose( transform.Normalize(mean=cfg.mean, std=cfg.std), transform.ToTensor()) else: raise ValueError
def getLoader(datasetName, dataroot, originalSize, imageSize, batchSize=64, workers=4, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), split='train', shuffle=True, seed=None): #import pdb; pdb.set_trace() if datasetName == 'folder': ########################### from pix2pix2 import folder_acquire as commonDataset import transform as transforms elif datasetName == 'list': from pix2pix2 import list_acquire as commonDataset import transform as transforms if split == 'train': dataset = commonDataset( root=dataroot, transform=transforms.Compose([ transforms.Scale(originalSize), # transforms.RandomCrop(imageSize), # transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean, std), ]), seed=seed) else: dataset = commonDataset( root=dataroot, transform=transforms.Compose([ transforms.Scale(originalSize), # transforms.CenterCrop(imageSize), transforms.ToTensor(), transforms.Normalize(mean, std), ]), seed=seed) assert dataset ims = dataset.imgs ############################ dataloader = torch.utils.data.DataLoader(dataset, batch_size=batchSize, shuffle=shuffle, num_workers=int(workers)) return dataloader, ims ######################
def __init__(self, cfg): with open(cfg.datapath + '/' + cfg.mode + '.txt', 'r') as lines: self.samples = [] for line in lines: imagepath = cfg.datapath + '/image/' + line.strip() + '.jpg' maskpath = cfg.datapath + '/scribble/' + line.strip() + '.png' self.samples.append([imagepath, maskpath]) if cfg.mode == 'train': self.transform = transform.Compose( transform.Normalize(mean=cfg.mean, std=cfg.std), transform.Resize(320, 320), transform.RandomHorizontalFlip(), transform.RandomCrop(320, 320), transform.ToTensor()) elif cfg.mode == 'test': self.transform = transform.Compose( transform.Normalize(mean=cfg.mean, std=cfg.std), transform.Resize(320, 320), transform.ToTensor()) else: raise ValueError
def img_transforms(img): img = np.array(img).astype(np.float32) sample = {'image': img} # img, label = random_crop(img, label, crop_size) transform = transforms.Compose([ # tr.FixedResize(img_size), tr.Normalize(mean=mean, std=std), tr.ToTensor() ]) sample = transform(sample) return sample['image']
def __init__(self, cfg): with open(os.path.join(cfg.datapath, cfg.mode + '.txt'), 'r') as lines: self.samples = [] for line in lines: imagepath = os.path.join(cfg.datapath, 'image', line.strip() + '.jpg') maskpath = os.path.join(cfg.datapath, 'mask', line.strip() + '.png') self.samples.append([imagepath, maskpath]) if cfg.mode == 'train': self.transform = transform.Compose( transform.Normalize(mean=cfg.mean, std=cfg.std), transform.Resize(320, 320), transform.RandomHorizontalFlip(), transform.RandomCrop(288, 288), transform.ToTensor()) elif cfg.mode == 'test': self.transform = transform.Compose( transform.Normalize(mean=cfg.mean, std=cfg.std), transform.Resize(320, 320), transform.ToTensor()) else: raise ValueError
def __init__(self, data, labels, is_train=True): super(Cifar10, self).__init__() self.data, self.labels = data, labels self.is_train = is_train assert len(self.data) == len(self.labels) mean, std = (0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616) # mean, std = (-0.0172, -0.0356, -0.1069), (0.4940, 0.4869, 0.5231) # [-1, 1] if is_train: self.trans_reg = transforms.Compose([ transforms.RandomResizedCrop(32), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomApply( [transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8), transforms.RandomGrayscale(p=0.2), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) self.trans_weak = T.Compose([ T.Resize((32, 32)), T.PadandRandomCrop(border=4, cropsize=(32, 32)), T.RandomHorizontalFlip(p=0.5), T.Normalize(mean, std), T.ToTensor(), ]) self.trans_strong = T.Compose([ T.Resize((32, 32)), T.PadandRandomCrop(border=4, cropsize=(32, 32)), T.RandomHorizontalFlip(p=0.5), RandomAugment(2, 10), T.Normalize(mean, std), T.ToTensor(), ]) else: self.trans = T.Compose([ T.Resize((32, 32)), T.Normalize(mean, std), T.ToTensor(), ])
def __init__(self, root, mode='train'): self.samples = [] lines = os.listdir(os.path.join(root, mode + '_images')) self.mode = mode for line in lines: rgbpath = os.path.join(root, mode + '_images', line) tpath = os.path.join(root, mode + '_depth', line[:-4] + '.png') maskpath = os.path.join(root, mode + '_masks', line[:-4] + '.png') self.samples.append([rgbpath, tpath, maskpath]) if mode == 'train': self.transform = transform.Compose( transform.Normalize(mean1=mean_rgb, std1=std_rgb), transform.Resize(256, 256), transform.RandomHorizontalFlip(), transform.ToTensor()) elif mode == 'test': self.transform = transform.Compose( transform.Normalize(mean1=mean_rgb, std1=std_rgb), transform.Resize(256, 256), transform.ToTensor()) else: raise ValueError
def __init__(self, data, labels, n_guesses=1, is_train=True): super(Cifar10, self).__init__() self.data, self.labels = data, labels self.n_guesses = n_guesses assert len(self.data) == len(self.labels) assert self.n_guesses >= 1 # mean, std = (0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616) # [0, 1] mean, std = (-0.0172, -0.0356, -0.1069), (0.4940, 0.4869, 0.5231 ) # [-1, 1] if is_train: self.trans = T.Compose([ T.Resize((32, 32)), T.PadandRandomCrop(border=4, cropsize=(32, 32)), T.RandomHorizontalFlip(p=0.5), T.Normalize(mean, std), T.ToTensor(), ]) else: self.trans = T.Compose([ T.Resize((32, 32)), T.Normalize(mean, std), T.ToTensor(), ])
def main(config): composed_transforms_ts = transforms.Compose([ transform.FixedResize(size=(config.input_size, config.input_size)), transform.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), transform.ToTensor() ]) if config.mode == 'train': dataset = Dataset(datasets=['DAVIS'], transform=composed_transforms_ts, mode='train') train_loader = data.DataLoader(dataset, batch_size=config.batch_size, num_workers=config.num_thread, drop_last=True, shuffle=True) if not os.path.exists("%s/%s" % (config.save_fold, 'models')): os.mkdir("%s/%s" % (config.save_fold, 'models')) config.save_fold = "%s/%s" % (config.save_fold, 'models') train = Solver(train_loader, None, config) train.train() elif config.mode == 'test': dataset = Dataset(datasets=config.test_dataset, transform=composed_transforms_ts, mode='test') test_loader = data.DataLoader(dataset, batch_size=config.test_batch_size, num_workers=config.num_thread, drop_last=True, shuffle=False) test = Solver(train_loader=None, test_loader=test_loader, config=config, save_fold=config.testsavefold) test.test() else: raise IOError("illegal input!!!")
samples, labels = self.get_items(current_pos, current_pos + batch_size) if self.transform: samples = self.transform(samples) yield samples, labels if __name__ == '__main__': # just for debug the dataset tic = time.time() background = np.ones(18, ) background = -9999 * background counts = 0 mean = unpickle('mean_channal.pkl') tfs = T.Compose([ T.Normalize(mean=mean), T.RandomHorizontalFlip(0.5), ]) datasets = CustomDataset('training.h5', transform=None) for samples, labels in datasets.load_data(batch_size=256, shuffle=False): # background += np.sum(samples, axis = (0,1,2)) counts += labels.shape[0] print("counts:", counts) # compute mean,std by channel print("len:", len(datasets)) # mean = background / len(datasets) # print("mean:", mean.shape) toc = time.time() print("elasped time is %.3f" % (toc - tic)) # pdb.set_trace() # all_data = np.concatenate([datasets.s1,datasets.s2], axis=3)
save_best_only=False, save_weights_only=False, mode='auto', period=1) #model = resnet.ResnetBuilder.build_resnet_18((18, 32, 32), nb_classes) network = model.create_model('resnet50', input_shape=(18, 32, 32), num_outputs=nb_classes) network.compile(loss='sparse_categorical_crossentropy', optimizer=optimizers.SGD(lr=0.1, momentum=0.9, nesterov=True), metrics=['accuracy']) mean = unpickle("mean_channal.pkl") trfs = T.Compose([T.Normalize(mean), T.RandomHorizontalFlip(0.5)]) training_data = CustomDataset('/mnt/img1/yangqh/Germany_cloud/training.h5', transform=None) validation_data = CustomDataset('/mnt/img1/yangqh/Germany_cloud/training.h5', transform=None) ##############此处改成train set 的目录 network.fit_generator( training_data.load_data(batch_size=batch_size), steps_per_epoch=len(training_data) // batch_size, validation_data=validation_data.load_data(batch_size=batch_size), validation_steps=len(validation_data) // batch_size, epochs=nb_epoch, verbose=1, max_q_size=100, callbacks=[checkpoint, lr_reducer, early_stopper, csv_logger]) network.save('final.h5')
def __init__(self, cfg): # NJUD: depth:*.jpg, gt:*.png, rgb:*.jpg # NLPR: depth:*.jpg, gt:*.jpg, rgb:*.jpg self.samples = [] self.mode = cfg.mode if cfg.mode == "train": with open(osp.join(cfg.datapath, "NLPR_score.pkl"), "rb") as fin: nlpr_data = pickle.load(fin) with open(osp.join(cfg.datapath, "NJUD_score.pkl"), "rb") as fin: njud_data = pickle.load(fin) with open(osp.join(cfg.datapath, "NLPR", cfg.mode + '.txt'), 'r') as lines: for line in lines: line = line.strip() image_name = osp.join(cfg.datapath, "NLPR/rgb", line + ".jpg") depth_name = osp.join(cfg.datapath, "NLPR/depth", line + ".jpg") ostu_rgb_name = osp.join(cfg.datapath, "NLPR/ostu_rgb", line + ".jpg") mask_name = osp.join(cfg.datapath, "NLPR/gt", line + ".jpg") #self.samples.append([image_name, ostu_rgb_name, mask_name]) key = nlpr_data[line]['f_beta'] self.samples.append( [key, image_name, depth_name, mask_name]) with open(osp.join(cfg.datapath, "NJUD", cfg.mode + '.txt'), 'r') as lines: for line in lines: line = line.strip() image_name = osp.join(cfg.datapath, "NJUD/rgb", line + ".jpg") depth_name = osp.join(cfg.datapath, "NJUD/depth", line + ".jpg") ostu_rgb_name = osp.join(cfg.datapath, "NJUD/ostu_rgb", line + ".jpg") mask_name = osp.join(cfg.datapath, "NJUD/gt", line + ".png") #self.samples.append([image_name, ostu_rgb_name, mask_name]) key = njud_data[line]['f_beta'] self.samples.append( [key, image_name, depth_name, mask_name]) """ with open(osp.join(cfg.datapath, "train.txt"), "r") as fin: for line in fin: line = line.strip() image_name = osp.join(cfg.datapath, "input_train", line+".jpg") depth_name = osp.join(cfg.datapath, "depth_train", line+".png") mask_name = osp.join(cfg.datapath, "gt_train", line+".png") self.samples.append([image_name, depth_name, mask_name]) """ print("train mode: len(samples):%s" % (len(self.samples))) else: #LFSD,NJUD,NLPR,STEREO797 #image, depth: *.jpg, mask:*.png def read_test(name): samples = [] with open(osp.join(cfg.datapath, "test.txt"), "r") as lines: for line in lines: line = line.strip() image_name = osp.join(cfg.datapath, "image", line + ".jpg") depth_name = osp.join(cfg.datapath, "depth", line + ".jpg") ostu_rgb_name = osp.join(cfg.datapath, "ostu_rgb", line + ".jpg") mask_name = osp.join(cfg.datapath, "mask", line + ".png") samples.append( [line, image_name, depth_name, mask_name]) return samples db_name = cfg.datapath.rstrip().split("/")[-1] self.samples = read_test(db_name) print("test mode name:%s, len(samples):%s" % (db_name, len(self.samples))) if cfg.mode == 'train': if cfg.train_scales is None: cfg.train_scales = [224, 256, 320] print("Train_scales:", cfg.train_scales) self.transform = transform.Compose( transform.MultiResize(cfg.train_scales), transform.MultiRandomHorizontalFlip(), transform.MultiNormalize(), transform.MultiToTensor()) elif cfg.mode == 'test': self.transform = transform.Compose( transform.Resize((256, 256)), transform.Normalize(mean=cfg.mean, std=cfg.std, d_mean=cfg.d_mean, d_std=cfg.d_std), transform.ToTensor(depth_gray=True)) else: raise ValueError
args.n_gpu = n_gpu args.distributed = n_gpu > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='gloo', init_method='env://') synchronize() device = 'cuda' train_trans = transform.Compose([ transform.RandomResize(args.train_min_size_range, args.train_max_size), transform.RandomHorizontalFlip(0.5), transform.ToTensor(), transform.Normalize(args.pixel_mean, args.pixel_std) ]) valid_trans = transform.Compose([ transform.Resize(args.test_min_size, args.test_max_size), transform.ToTensor(), transform.Normalize(args.pixel_mean, args.pixel_std) ]) train_set = COCODataset(args.path, 'train', train_trans) valid_set = COCODataset(args.path, 'val', valid_trans) # backbone = vovnet39(pretrained=True) # backbone = vovnet57(pretrained=True) # backbone = resnet18(pretrained=True) backbone = resnet50(pretrained=True)
mul = transform.Mul() Solarize = transform.Solarize() JpegCompression = transform.JpegCompression() #blend BlendAlpha = transform.BlendAlpha() BlendAlphaElementwise = transform.BlendAlphaElementwise() #blur GaussianBlur = transform.GaussianBlur() AverageBlur = transform.AverageBlur() MedianBlur = transform.MedianBlur() MotionBlur = transform.MotionBlur() flip = transform.Flip() flip_1 = transform.Flip(p=1) normalize = transform.Normalize() Pad_And_Shift = transform.Pad_And_Shift(max_size=512) train_pipeline = [ resize, [[CoarseDropout, CoarseSaltAndPepper, Cutout], [AdditiveGaussianNoise, AdditiveLaplaceNoise, AdditivePoissonNoise], [HueSat, DropChannel], [elastic, Rotate_Shear], [add, mul, JpegCompression]], flip, normalize, Pad_And_Shift ] test_pipeline = [[Resize([224]), normalize, Pad_And_Shift], [Resize([320]), normalize, Pad_And_Shift], [Resize([416]), normalize, Pad_And_Shift], [Resize([512]), normalize, Pad_And_Shift]]
def main(): print("------------------------------") print("START") print("------------------------------") composed_transforms_tr = standard_transforms.Compose([ tr.RandomHorizontalFlip(), tr.ScaleNRotate(rots=(-15, 15), scales=(.75, 1.5)), # tr.RandomResizedCrop(img_size), tr.FixedResize(img_size), tr.Normalize(mean=mean, std=std), tr.ToTensor() ]) # data pocessing and data augumentation voc_train_dataset = VOCSegmentation( base_dir=data_dir, split='train', transform=composed_transforms_tr) # get data #return {'image': _img, 'gt': _target} print("Data loaded...") print("Dataset:{}".format(dataset)) print("------------------------------") voc_train_loader = DataLoader(voc_train_dataset, batch_size=batch_size, shuffle=True, num_workers=1) iter_dataset = iter(voc_train_loader) train = next(iter_dataset) print("Input size {}".format(train['image'].shape)) print("Output size {}".format(train['gt'].shape)) print("Model start training...") print("------------------------------") print("Model info:") print("If use CUDA : {}".format(use_gpu)) print('Initial learning rate {} | batch size {} | epoch num {}'.format( 0.0001, batch_size, epoches)) print("------------------------------") model = fpn_unet(input_bands=input_bands, n_classes=num_class) model_id = 0 # load model if find_new_file(model_dir) is not None: model.load_state_dict(torch.load(find_new_file(model_dir))) # model.load_state_dict(torch.load('./pth/best2.pth')) print('load the model %s' % find_new_file(model_dir)) model_id = re.findall(r'\d+', find_new_file(model_dir)) model_id = int(model_id[0]) print('Current model ID {}'.format(model_id)) model.cuda() criterion = torch.nn.CrossEntropyLoss() #define loss optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) #define optimizer model.cuda() model.train() f = open('log.txt', 'w') for epoch in range(epoches): cur_log = '' running_loss = 0.0 start = time.time() lr = adjust_learning_rate(base_lr, optimizer, epoch, model_id, power) print("Current learning rate : {}".format(lr)) for i, batch_data in tqdm.tqdm(enumerate(voc_train_loader)): #get data images, labels = batch_data['image'], batch_data['gt'] labels = labels.view(images.size()[0], img_size, img_size).long() i += images.size()[0] images = Variable(images).cuda() labels = Variable(labels).cuda() optimizer.zero_grad() outputs = model(images) losses = criterion(outputs, labels) # calculate loss losses.backward() optimizer.step() running_loss += losses print("Epoch [%d] all Loss: %.4f" % (epoch + 1 + model_id, running_loss / i)) cur_log += 'epoch:{}, '.format(str(epoch)) + 'learning_rate:{}'.format( str(lr)) + ', ' + 'train_loss:{}'.format( str(running_loss.item() / i)) + ', ' torch.save(model.state_dict(), os.path.join(model_dir, '%d.pth' % (model_id + epoch + 1))) print("Model Saved") # iou, acc, recall, precision = test_my(input_bands, model_name, model_dir, img_size, num_class) # cur_log += 'iou:{}'.format(str(iou)) + ', ' + 'acc:{}'.format(str(acc))+'\n' + ', ' + 'recall:{}'.format(str(recall))+'\n' + ', ' + 'precision:{}'.format(str(precision)) end = time.time() time_cha = end - start left_steps = epoches - epoch - model_id print('the left time is %d hours, and %d minutes' % (int(left_steps * time_cha) / 3600, (int(left_steps * time_cha) % 3600) / 60)) print(cur_log) f.writelines(str(cur_log))
init_method='env://') synchronize() img_mean = [0.485, 0.456, 0.406] img_std = [0.229, 0.224, 0.225] device = 'cuda' # torch.backends.cudnn.deterministic = True train_trans = transform.Compose([ transform.RandomScale(0.5, 2.0), # transform.Resize(args.size, None), transform.RandomHorizontalFlip(), transform.RandomCrop(args.size), transform.RandomBrightness(0.04), transform.ToTensor(), transform.Normalize(img_mean, img_std), transform.Pad(args.size) ]) valid_trans = transform.Compose( [transform.ToTensor(), transform.Normalize(img_mean, img_std)]) train_set = ADE20K(args.path, 'train', train_trans) valid_set = ADE20K(args.path, 'valid', valid_trans) arch_map = {'vovnet39': vovnet39, 'vovnet57': vovnet57} backbone = arch_map[args.arch](pretrained=True) model = OCR(args.n_class + 1, backbone).to(device) if args.distributed:
import sys import time import torchaudio import pandas as pd import xml.etree.ElementTree as ET import ranking import transform as T import numpy as np import torchvision import torch print(torch.__version__) print(torchvision.__version__) normalize = T.Normalize(mean=[0.43216, 0.394666, 0.37645], std=[0.22803, 0.22145, 0.216989]) # def normalize(tensor): # # Subtract the mean, and scale to the interval [-1,1] # tensor_minusmean = tensor - tensor.mean() # return tensor_minusmean/tensor_minusmean.abs().max() transform_video = torchvision.transforms.Compose([ T.ToFloatTensorInZeroOne(), T.Resize((128, 171)), T.RandomHorizontalFlip(), normalize, T.RandomCrop((112, 112)) ]) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") root = ET.parse( '/root/yangsen-data/LIRIS-ACCEDE-movies/ACCEDEmovies.xml').getroot() movie_length = {}
synchronize() img_mean = [0.485, 0.456, 0.406] img_std = [0.229, 0.224, 0.225] device = 'cuda' # torch.backends.cudnn.deterministic = True train_trans = transform.Compose( [ transform.RandomScale(0.5, 2.0), # transform.Resize(args.size, None), transform.RandomHorizontalFlip(), transform.RandomCrop(args.size), transform.RandomBrightness(0.04), transform.ToTensor(), transform.Normalize(img_mean, img_std), transform.Pad(args.size) ] ) valid_trans = transform.Compose( [transform.ToTensor(), transform.Normalize(img_mean, img_std)] ) train_set = ADE20K(args.path, 'train', train_trans) valid_set = ADE20K(args.path, 'valid', valid_trans) arch_map = {'vovnet39': vovnet39, 'vovnet57': vovnet57} backbone = arch_map[args.arch](pretrained=True) model = OCR(args.n_class + 1, backbone).to(device)
def main(): #数据集加载 dataset = Market1501() #训练数据处理器 transform_train = T.Compose([ T.Random2DTransform(height, width), #尺度统一,随机裁剪 T.RandomHorizontalFlip(), #水平翻转 T.ToTensor(), #图片转张量 T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), #归一化,参数固定 ]) #测试数据处理器 transform_test = T.Compose([ T.Resize((height, width)), #尺度统一 T.ToTensor(), #图片转张量 T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), #归一化,参数固定 ]) #train数据集吞吐器 train_data_loader = DataLoader( ImageDataset(dataset.train, transform=transform_train), #自定义的数据集,使用训练数据处理器 batch_size=train_batch_size, #一个批次的大小(一个批次有多少个图片张量) drop_last=True, #丢弃最后无法称为一整个批次的数据 ) print("train_data_loader inited") #query数据集吞吐器 query_data_loader = DataLoader( ImageDataset(dataset.query, transform=transform_test), #自定义的数据集,使用测试数据处理器 batch_size=test_batch_size, #一个批次的大小(一个批次有多少个图片张量) shuffle=False, #不重排 drop_last=True, #丢弃最后无法称为一整个批次的数据 ) print("query_data_loader inited") #gallery数据集吞吐器 gallery_data_loader = DataLoader( ImageDataset(dataset.gallery, transform=transform_test), #自定义的数据集,使用测试数据处理器 batch_size=test_batch_size, #一个批次的大小(一个批次有多少个图片张量) shuffle=False, #不重排 drop_last=True, #丢弃最后无法称为一整个批次的数据 ) print("gallery_data_loader inited\n") #加载模型 model = ReIDNet(num_classes=751, loss={'softmax'}) #指定分类的数量,与使用的损失函数以便决定模型输出何种计算结果 print("=>ReIDNet loaded") print("Model size: {:.5f}M\n".format( sum(p.numel() for p in model.parameters()) / 1000000.0)) #损失函数 criterion_class = nn.CrossEntropyLoss() """ 优化器 参数1,待优化的参数 参数2,学习率 参数3,权重衰减 """ optimizer = torch.optim.SGD(model.parameters(), lr=train_lr, weight_decay=5e-04) """ 动态学习率 参数1,指定使用的优化器 参数2,mode,可选择‘min’(min表示当监控量停止下降的时候,学习率将减小)或者‘max’(max表示当监控量停止上升的时候,学习率将减小) 参数3,factor,代表学习率每次降低多少 参数4,patience,容忍网路的性能不提升的次数,高于这个次数就降低学习率 参数5,min_lr,学习率的下限 """ scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=dy_step_gamma, patience=10, min_lr=0.0001) #如果是测试 if evaluate: test(model, query_data_loader, gallery_data_loader) return 0 #如果是训练 print('————model start training————\n') bt = time.time() #训练的开始时间 for epoch in range(start_epoch, end_epoch): model.train(True) train(epoch, model, criterion_class, optimizer, scheduler, train_data_loader) et = time.time() #训练的结束时间 print('**模型训练结束, 保存最终参数到{}**\n'.format(final_model_path)) torch.save(model.state_dict(), final_model_path) print('————训练总用时{:.2f}小时————'.format((et - bt) / 3600.0))
return 0 if __name__ == '__main__': # 使用局部对齐模型 model = ReIDNet(num_classes=751, loss={'softmax, metric'}, aligned=True) # 加载局部对齐模型最优参数 model.load_state_dict( torch.load('./model/param/aligned_trihard_net_params_best.pth')) # 指定数据集 dataset = Market1501() # query数据与gallery数据处理器 transform = T.Compose([ T.Resize((height, width)), # 尺度统一 T.ToTensor(), # 图片转张量 T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # 归一化,参数固定 ]) # query集吞吐器 query_data_loader = DataLoader( ImageDataset(dataset.query, transform=transform), # 自定义的数据集,指定使用数据处理器 batch_size=batch_size, # 一个批次的大小(一个批次有多少个图片张量) drop_last=True, # 丢弃最后无法称为一整个批次的数据 ) # gallery集吞吐器 gallery_data_loader = DataLoader( ImageDataset(dataset.gallery, transform=transform), # 自定义的数据集,指定使用数据处理器 batch_size=batch_size, # 一个批次的大小(一个批次有多少个图片张量) drop_last=True, # 丢弃最后无法称为一整个批次的数据 ) # 调用test函数进行算法性能评估