Beispiel #1
0
    def __getitem__(self, index):
        if self.is_train:
            img, target = self.train_img[index], self.train_label[index]
            if len(img.shape) == 2:
                img = np.stack([img] * 3, 2)
            img = Image.fromarray(img, mode="RGB")
            img = transforms.Resize((256, 256), Image.BILINEAR)(img)
            img = transforms.RandomCrop(INPUT_SIZE)(img)
            img = transforms.RandomHorizontalFlip()(img)
            img = transforms.ToTensor()(img)
            img = transforms.Normalize([0.485, 0.456, 0.406],
                                       [0.229, 0.224, 0.225])(img)

        else:
            img, target = self.test_img[index], self.test_label[index]
            if len(img.shape) == 2:
                img = np.stack([img] * 3, 2)
            img = Image.fromarray(img, mode="RGB")
            img = transforms.Resize((256, 256), Image.BILINEAR)(img)
            img = transforms.CenterCrop(INPUT_SIZE)(img)
            img = transforms.ToTensor()(img)
            img = transforms.Normalize([0.485, 0.456, 0.406],
                                       [0.229, 0.224, 0.225])(img)

        return img, target
def train(args):

    joint_transform = transforms.Compose([
        transforms.RandomScale(),
        transforms.Mirror(),
        transforms.RandomCrop()
    ])
    trainset = datasets[args.dataset](mode=args.mode, root=args.dataset_root)

    net = models[args.g]
    def __init__(self, train=True):
        self.root = './data/ESC50/ESC-50-master/audio/'
        self.train = train

        #getting name of all files inside the all of the train_folds
        temp = open('./data/ESC50/ESC10_file_names.txt',
                    'r').read().split('\n')
        temp.sort()
        self.file_names = []
        if train:
            for i in range(len(temp)):
                if int(temp[i].split('-')[0]) in config.train_folds:
                    self.file_names.append(temp[i])
        else:
            for i in range(len(temp)):
                if int(temp[i].split('-')[0]) in config.test_fold:
                    self.file_names.append(temp[i])

        if self.train:
            self.wave_transforms = torchvision.transforms.Compose([
                transforms.ToTensor1D(),
                transforms.RandomScale(max_scale=1.25),
                transforms.RandomPadding(out_len=220500),
                transforms.RandomCrop(out_len=220500)
            ])

            self.spec_transforms = torchvision.transforms.Compose([
                torchvision.transforms.ToTensor(),
                transforms.FrequencyMask(max_width=config.freq_masks_width,
                                         numbers=config.freq_masks),
                transforms.TimeMask(max_width=config.time_masks_width,
                                    numbers=config.time_masks)
            ])

        else:  #for test
            self.wave_transforms = torchvision.transforms.Compose([
                transforms.ToTensor1D(),
                transforms.RandomPadding(out_len=220500),
                transforms.RandomCrop(out_len=220500)
            ])

            self.spec_transforms = torchvision.transforms.Compose(
                [torchvision.transforms.ToTensor()])
def get_transform(train=False):
    transforms = []

    if train:
        transforms.append(custom_T.RandomHorizontalFlip())
        transforms.append(custom_T.RandomCrop())

    transforms.append(custom_T.ToTensor())
    transforms.append(custom_T.FasterRCNNResizer())

    return custom_T.Compose(transforms)
Beispiel #5
0
    def __init__(self, root=None, dataloader=default_loader):
        self.transform1 = transforms.Compose([
            transforms.RandomRotation(30),
            transforms.Resize([256, 256]),
            transforms.RandomCrop(INPUT_SIZE),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(brightness=(0.9, 1.1),
                                   contrast=(0.9, 1.1),
                                   saturation=(0.9, 1.1)),
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.485, 0.456, 0.406),
                                 std=(0.229, 0.224, 0.225)),
            transforms.RandomErasing(probability=0.5, sh=0.05)
        ])
        # 增强方法2: 关注更小的区域
        self.transform2 = transforms.Compose([
            transforms.RandomRotation(30),
            transforms.Resize([336, 336]),
            transforms.RandomCrop(INPUT_SIZE),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(brightness=(0.9, 1.1),
                                   contrast=(0.9, 1.1),
                                   saturation=(0.9, 1.1)),
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.485, 0.456, 0.406),
                                 std=(0.229, 0.224, 0.225)),
            transforms.RandomErasing(probability=0.5, sh=0.05)
        ])
        self.dataloader = dataloader

        self.root = root
        with open(os.path.join(self.root, TRAIN_DATASET), 'r') as fid:
            self.imglist = fid.readlines()

        self.labels = []
        for line in self.imglist:
            image_path, label = line.strip().split()
            self.labels.append(int(label))
        self.labels = np.array(self.labels)
        self.labels = torch.LongTensor(self.labels)
Beispiel #6
0
	def __init__(self, train=True):
		self.root = './data/US8K/audio/'
		self.train = train
        
		self.file_paths = [] #only includes the name of the fold and name of the file, like: 'fold2/4201-3-0-0.wav'
        
		if train:
			for f in config.train_folds:
				file_names = os.listdir(self.root + 'fold' + str(f) + '/' )
                
				for name in file_names:
					if name.split('.')[-1] == 'wav':
						self.file_paths.append('fold' + str(f) + '/' + name)
		else:
			file_names = os.listdir(self.root + 'fold' + str(config.test_fold[0]) + '/' )
			for name in file_names:
				if name.split('.')[-1] == 'wav':
					self.file_paths.append('fold' + str(config.test_fold[0]) + '/' + name)
        

		if self.train:
			self.wave_transforms = torchvision.transforms.Compose([ transforms.ToTensor1D(),
                                                                   transforms.RandomScale(max_scale = 1.25), 
                                                                  transforms.RandomPadding(out_len = 176400),
                                                                  transforms.RandomCrop(out_len = 176400)])
             
			self.spec_transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),
                                                                   transforms.FrequencyMask(max_width = config.freq_masks_width, numbers = config.freq_masks),
                                                                   transforms.TimeMask(max_width = config.time_masks_width, numbers = config.time_masks)])
            
            
		else: #for test
			self.wave_transforms = torchvision.transforms.Compose([ transforms.ToTensor1D(),
                                                                    transforms.RandomPadding(out_len = 176400),
                                                                    transforms.RandomCrop(out_len = 176400)])
            
			self.spec_transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor() ])
Beispiel #7
0
def get_dataloader():
    # TODO(xwd): Adaptive normalization by some large image.
    # E.g. In medical image processing, WSI image is very large and different to ordinary images.

    value_scale = 255
    mean = [0.485, 0.456, 0.406]
    mean = [item * value_scale for item in mean]
    std = [0.229, 0.224, 0.225]
    std = [item * value_scale for item in std]

    train_transform = transform.Compose([
        transform.RandomScale([cfg['scale_min'], cfg['scale_max']]),
        transform.RandomRotate([cfg['rotate_min'], cfg['rotate_max']],
                               padding=mean,
                               ignore_label=cfg['ignore_label']),
        transform.RandomGaussianBlur(),
        transform.RandomHorizontallyFlip(),
        transform.RandomCrop([cfg['train_h'], cfg['train_w']],
                             crop_type='rand',
                             padding=mean,
                             ignore_label=cfg['ignore_label']),
        transform.ToTensor(),
        transform.Normalize(mean=mean, std=std)
    ])

    train_data = cityscapes.Cityscapes(cfg['data_path'],
                                       split='train',
                                       transform=train_transform)

    # Use data sampler to make sure each GPU loads specific parts of dataset to avoid data reduntant.
    train_sampler = DistributedSampler(train_data)

    train_loader = DataLoader(train_data,
                              batch_size=cfg['batch_size'] //
                              cfg['world_size'],
                              shuffle=(train_sampler is None),
                              num_workers=4,
                              pin_memory=True,
                              sampler=train_sampler,
                              drop_last=True)

    return train_loader, train_sampler
def get_transform(train=False, yolo=False, aug=None):
    assert aug == 'dirty_camera_lens' or aug == 'gan' or aug is None, "Aug parameter not valid"

    transforms = []

    if yolo:
        transforms.append(custom_T.PadToSquare())
        transforms.append(custom_T.Resize(img_size=None))

    if train:
        transforms.append(custom_T.RandomHorizontalFlip())
        transforms.append(custom_T.RandomCrop())

    if aug == 'dirty_camera_lens':
        print("Augmentation: Dirty Camera Lens")
        transforms.append(custom_T.DirtyCameraLens())

    transforms.append(custom_T.ToTensor())
    # transforms.append(custom_T.FasterRCNNResizer())

    return custom_T.Compose(transforms)
Beispiel #9
0
    def __init__(self, root=None, dataloader=default_loader):
        self.transform = transforms.Compose([
            transforms.Resize([256, 256]),
            transforms.RandomCrop(INPUT_SIZE),
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.485, 0.456, 0.406),
                                 std=(0.229, 0.224, 0.225))
        ])
        self.dataloader = dataloader

        self.root = root
        self.imgs = []
        self.labels = []

        with open(os.path.join(self.root, EVAL_DATASET), 'r') as fid:
            for line in fid.readlines():
                img_path, label = line.strip().split()
                img = self.dataloader(img_path)
                label = int(label)
                self.imgs.append(img)
                self.labels.append(label)
def main():
    global BEST_LOSS
    cudnn.benchmark = True
    start_epoch = cfg.OPOSE.start_epoch  # start from epoch 0 or last checkpoint epoch
    # Create ckpt & vis folder
    if not os.path.isdir(cfg.OPOSE.ckpt):
        os.makedirs(cfg.OPOSE.ckpt)
    if not os.path.exists(os.path.join(cfg.OPOSE.ckpt, 'vis')):
        os.makedirs(os.path.join(cfg.OPOSE.ckpt, 'vis'))
    if args.cfg_file is not None and not cfg.OPOSE.evaluate:
        shutil.copyfile(
            args.cfg_file,
            os.path.join(cfg.OPOSE.ckpt,
                         args.cfg_file.split('/')[-1]))
    model = pose_estimation.PoseModel(num_point=19,
                                      num_vector=19,
                                      pretrained=True)
    # # Calculate FLOPs & Param
    # n_flops, n_convops, n_params = measure_model(model, cfg.OPOSE.input_size, cfg.OPOSE.input_size)
    criterion = nn.MSELoss().cuda()
    # Dataset and Loader
    train_dataset = dataset.CocoOpenposeData(
        cfg,
        cfg.OPOSE.data_root,
        cfg.OPOSE.info_root,
        'train2017',
        transformer=transforms.Compose([
            transforms.RandomResized(),
            transforms.RandomRotate(40),
            transforms.RandomCrop(368),
            transforms.RandomHorizontalFlip(),
        ]))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=BATCH_SIZE,
                                               shuffle=True,
                                               num_workers=int(cfg.workers),
                                               pin_memory=True)
    if cfg.OPOSE.validate or cfg.OPOSE.evaluate:
        val_dataset = dataset.CocoOpenposeData(
            cfg,
            cfg.OPOSE.data_root,
            cfg.OPOSE.info_root,
            'val2017',
            transformer=transforms.Compose(
                [transforms.TestResized(cfg.OPOSE.input_size)]))
        val_loader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=BATCH_SIZE,
                                                 shuffle=False,
                                                 num_workers=int(cfg.workers),
                                                 pin_memory=True)
    # Load nets into gpu
    if NUM_GPUS > 1:
        model = torch.nn.DataParallel(model, device_ids=gpu).cuda()
    # Set up optimizers
    params, multiple = get_parameters(model, cfg, False)

    optimizer = torch.optim.SGD(params,
                                cfg.OPOSE.base_lr,
                                momentum=cfg.OPOSE.momentum,
                                weight_decay=cfg.OPOSE.weight_decay)

    # Resume training
    title = 'Pytorch-OPOSE-{}-{}'.format(cfg.OPOSE.arch_encoder,
                                         cfg.OPOSE.arch_decoder)
    if cfg.OPOSE.resume:
        # Load checkpoint.
        print("==> Resuming from checkpoint '{}'".format(cfg.OPOSE.resume))
        assert os.path.isfile(
            cfg.OPOSE.resume), 'Error: no checkpoint directory found!'
        ckpt = torch.load(cfg.OPOSE.resume)
        BEST_LOSS = ckpt['best_loss']
        start_epoch = ckpt['epoch']
        try:
            model.module.load_state_dict(ckpt['state_dict'])
        except:
            model.load_state_dict(ckpt['state_dict'])

        optimizer.load_state_dict(ckpt['optimizer'])
        logger = Logger(os.path.join(cfg.OPOSE.ckpt, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        logger = Logger(os.path.join(cfg.OPOSE.ckpt, 'log.txt'), title=title)
        logger.set_names(
            ['epoch', 'Learning Rate', 'Train Loss', 'Valid Loss'])

    # Train and val
    for epoch in range(start_epoch, cfg.OPOSE.epochs):
        print('\nEpoch: [{}/{}] | LR: {:.8f} '.format(epoch + 1,
                                                      cfg.OPOSE.epochs,
                                                      cfg.OPOSE.base_lr))
        train_loss = train(train_loader, model, criterion, optimizer, epoch,
                           USE_CUDA)
        if cfg.OPOSE.validate:
            test_loss = test(val_loader, model, criterion, optimizer, epoch,
                             USE_CUDA)
        else:
            test_loss = 0.0, 0.0

        # Append logger file
        logger.append([epoch, cfg.OPOSE.base_lr, train_loss, test_loss])
        # Save model
        save_checkpoint(model, optimizer, test_loss, epoch)
        # Adjust learning rate
        adjust_learning_rate(optimizer, epoch)
        # Draw curve
        try:
            draw_curve('model', cfg.OPOSE.ckpt)
            print('==> Success saving log curve...')
        except:
            print('==> Saving log curve error...')

    logger.close()
    try:
        savefig(os.path.join(cfg.OPOSE.ckpt, 'log.eps'))
        shutil.copyfile(
            os.path.join(cfg.OPOSE.ckpt, 'log.txt'),
            os.path.join(
                cfg.OPOSE.ckpt, 'log{}.txt'.format(
                    datetime.datetime.now().strftime('%Y%m%d%H%M%S'))))
    except:
        print('Copy log error.')
    print('==> Training Done!')
    print('==> Best acc: {:.4f}%'.format(BEST_LOSS))
Beispiel #11
0
def load_transform(args):
    '''
    Returns data transform for each rating scale.
    '''
    if args.vrs == 'mta':
        pre = [tfs.SwapAxes(1, 2)]
        mid_train = [
            tfs.CenterCrop(args.size_x + 10,
                           args.size_y + 10,
                           args.size_z + 6,
                           offset_x=args.offset_x,
                           offset_y=args.offset_y,
                           offset_z=args.offset_z),
            tfs.RandomCrop(args.size_x, args.size_y, args.size_z)
        ]
        mid_train_2 = [tfs.ReduceSlices(1, 2)]
        mid_test = [
            tfs.CenterCrop(args.size_x,
                           args.size_y,
                           args.size_z,
                           offset_x=args.offset_x,
                           offset_y=args.offset_y,
                           offset_z=args.offset_z)
        ]
        if args.arch == 'VGG_bl':  # for VGG baseline comparison in research paper
            post = [tfs.ToTensorFSL(), tfs.PerImageNormalization()]
        else:
            post = [
                tfs.ToTensorFSL(),
                tfs.PerImageNormalization(),
                tfs.Return5D(nc=args.nc)
            ]

        transform_train = Compose(
            compose_transform_parts(pre=pre, mid=mid_train, post=post))
        transform_train_x = Compose(
            compose_transform_parts(pre=pre,
                                    mid=mid_train,
                                    mid_2=mid_train_2,
                                    post=post))
        transform_test = Compose(
            compose_transform_parts(pre=pre, mid=mid_test, post=post))

    elif args.vrs == 'gca-f':
        pre = None
        mid_train = [  #tf.RotateVolume(1),tf.RotateVolume(0),
            tfs.CenterCrop(args.size_x + 10,
                           args.size_y + 10,
                           args.size_z + 6,
                           offset_x=args.offset_x,
                           offset_y=args.offset_y,
                           offset_z=args.offset_z),
            tfs.RandomCrop(args.size_x, args.size_y, args.size_z)
        ]
        mid_train_2 = [tfs.ReduceSlices(1, 2), tfs.RandomMirrorLR(0)]
        mid_train_2_x = [tfs.ReduceSlices(1, 3), tfs.RandomMirrorLR(0)]
        mid_test = [
            tfs.CenterCrop(args.size_x,
                           args.size_y,
                           args.size_z,
                           offset_x=args.offset_x,
                           offset_y=args.offset_y,
                           offset_z=args.offset_z),
            tfs.ReduceSlices(1, 2)
        ]
        if args.arch == 'VGG_bl':
            post = [tfs.ToTensorFSL(), tfs.PerImageNormalization()]
        else:
            post_train = [
                tfs.ToTensorFSL(),
                tfs.PerImageNormalization(),
                tfs.RandomNoise(noise_var=.05, p=.5),
                tfs.Return5D(nc=args.nc)
            ]
            post_test = [
                tfs.ToTensorFSL(),
                tfs.PerImageNormalization(),
                tfs.Return5D(nc=args.nc)
            ]

        transform_train = Compose(
            compose_transform_parts(pre=pre,
                                    mid=mid_train,
                                    mid_2=mid_train_2,
                                    post=post_train))
        transform_train_x = Compose(
            compose_transform_parts(pre=pre,
                                    mid=mid_train,
                                    mid_2=mid_train_2_x,
                                    post=post_train))
        transform_test = Compose(
            compose_transform_parts(pre=pre, mid=mid_test, post=post_test))

    elif args.vrs == 'pa':
        pre = None
        mid_train = [
            tfs.CenterCrop(args.size_x + 10,
                           args.size_y + 10,
                           args.size_z + 6,
                           offset_y=args.offset_y,
                           offset_x=args.offset_x,
                           offset_z=args.offset_z),
            tfs.RandomCrop(args.size_x, args.size_y, args.size_z)
        ]
        mid_test = [
            tfs.CenterCrop(args.size_x,
                           args.size_y,
                           args.size_z,
                           offset_y=args.offset_y,
                           offset_x=args.offset_x,
                           offset_z=args.offset_z)
        ]
        if args.arch == 'VGG_bl':
            post = [
                tfs.ToTensorFSL(),
                tfs.PerImageNormalization(),
                tfs.ReturnStackedPA(nc=args.nc, rnn=False)
            ]
        else:
            post = [
                tfs.ToTensorFSL(),
                tfs.PerImageNormalization(),
                tfs.ReturnStackedPA(nc=args.nc)
            ]
        transform_train = Compose(
            compose_transform_parts(pre=pre, mid=mid_train, post=post))
        transform_train_x = Compose(
            compose_transform_parts(pre=pre, mid=mid_train, post=post))
        transform_test = Compose(
            compose_transform_parts(pre=pre, mid=mid_test, post=post))
    return transform_train, transform_test, transform_train_x
Beispiel #12
0
import utils.transforms as trans
from model.deeplab import DeepLab
import matplotlib.pyplot as plt
from tools import prediction
from utils.metrics import Evaluator
args = get_args()
rng = np.random.RandomState(seed=args.seed)

torch.manual_seed(seed=args.seed)

transform_train = trans.Compose([
    trans.RandomHorizontalFlip(),
    #trans.FixScale((args.crop_size,args.crop_size)),
    trans.RandomScale((0.5, 2.0)),
    #trans.FixScale(args.crop_size),
    trans.RandomCrop(args.crop_size),
    trans.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    trans.ToTensor(),
])

transform_val = trans.Compose([
    #trans.FixScale((args.crop_size,args.crop_size)),
    trans.FixScale(args.crop_size),
    trans.CenterCrop(args.crop_size),
    trans.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    trans.ToTensor(),
])
if (args.aug == True):
    voc_train = VOCSegmentation(root='./data',
                                set_name='train',
                                transform=transform_train)
Beispiel #13
0
    def build_model(self):
        """ DataLoader """
        pad = int(30 * self.img_size // 256)
        train_transform = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.Resize((self.img_size + pad, self.img_size + pad)),
            transforms.RandomCrop(self.img_size),
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
        ])
        test_transform = transforms.Compose([
            transforms.Resize((self.img_size, self.img_size)),
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
        ])

        self.trainA = ImageFolder(
            os.path.join('dataset', self.dataset, 'trainA'), train_transform)
        self.trainB = ImageFolder(
            os.path.join('dataset', self.dataset, 'trainB'), train_transform)
        self.testA = ImageFolder(
            os.path.join('dataset', self.dataset, 'testA'), test_transform)
        self.testB = ImageFolder(
            os.path.join('dataset', self.dataset, 'testB'), test_transform)
        self.trainA_loader = DataLoader(self.trainA,
                                        batch_size=self.batch_size,
                                        shuffle=True)
        self.trainB_loader = DataLoader(self.trainB,
                                        batch_size=self.batch_size,
                                        shuffle=True)
        self.testA_loader = DataLoader(self.testA, batch_size=1, shuffle=False)
        self.testB_loader = DataLoader(self.testB, batch_size=1, shuffle=False)
        """ Define Generator, Discriminator """
        self.genA2B = ResnetGenerator(input_nc=3,
                                      output_nc=3,
                                      ngf=self.ch,
                                      n_blocks=self.n_res,
                                      img_size=self.img_size,
                                      light=self.light)
        self.genB2A = ResnetGenerator(input_nc=3,
                                      output_nc=3,
                                      ngf=self.ch,
                                      n_blocks=self.n_res,
                                      img_size=self.img_size,
                                      light=self.light)
        self.disGA = Discriminator(input_nc=3, ndf=self.ch, n_layers=7)
        self.disGB = Discriminator(input_nc=3, ndf=self.ch, n_layers=7)
        self.disLA = Discriminator(input_nc=3, ndf=self.ch, n_layers=5)
        self.disLB = Discriminator(input_nc=3, ndf=self.ch, n_layers=5)
        """ Define Loss """
        self.L1_loss = loss.L1Loss()
        self.MSE_loss = loss.MSELoss()
        self.BCE_loss = loss.BCEWithLogitsLoss()
        """ Trainer """
        def get_params(block):
            out = []
            for name, param in block.named_parameters():
                if 'instancenorm' in name or 'weight_u' in name or 'weight_v' in name:
                    continue
                out.append(param)
            return out

        genA2B_parameters = get_params(self.genA2B)
        genB2A_parameters = get_params(self.genB2A)
        disGA_parameters = get_params(self.disGA)
        disGB_parameters = get_params(self.disGB)
        disLA_parameters = get_params(self.disLA)
        disLB_parameters = get_params(self.disLB)
        G_parameters = genA2B_parameters + genB2A_parameters
        D_parameters = disGA_parameters + disGB_parameters + disLA_parameters + disLB_parameters
        self.G_optim = fluid.optimizer.Adam(
            parameter_list=G_parameters,
            learning_rate=self.lr,
            beta1=0.5,
            beta2=0.999,
            regularization=fluid.regularizer.L2Decay(self.weight_decay))
        self.D_optim = fluid.optimizer.Adam(
            parameter_list=D_parameters,
            learning_rate=self.lr,
            beta1=0.5,
            beta2=0.999,
            regularization=fluid.regularizer.L2Decay(self.weight_decay))
        """ Define Rho clipper to constraint the value of rho in AdaILN and ILN"""
Beispiel #14
0
        fin.write(model_path + ' ' + str(epoch) + '\n')


#dataset prepare
#---------------------------------
print('Loading dataset...')
cache_size = 256
if args.image_size == 448:
    cache_size = 256 * 2
if args.image_size == 352:
    cache_size = 402
transform_train = transforms.Compose([
    transforms.Resize((cache_size,cache_size)),
    #transforms.Resize((args.image_size,args.image_size)),
    #transforms.RandomRotation(10),
    transforms.RandomCrop(args.image_size),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225]),
])

transform_test = transforms.Compose([
    transforms.Resize((cache_size,cache_size)),
    transforms.CenterCrop(args.image_size),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225]),
])


print("Dataset Initializing...")
trainset = SRDataset.SRDataset(max_person=args.max_person,image_dir=args.images_root, \
Beispiel #15
0
def generate_dataloader(model, eval_type, args):
    cache_size = 256
    if args.image_size == 448:
        cache_size = 256 * 2
    if args.image_size == 352:
        cache_size = 402

    transform_train = transforms.Compose([
        transforms.Resize((cache_size, cache_size)),
        transforms.RandomCrop(args.image_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])
    transform_test = transforms.Compose([
        transforms.Resize((cache_size, cache_size)),
        transforms.CenterCrop(args.image_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])

    if eval_type == 'train':
        data = ImageDataset(max_person=args.max_person + 1,
                            image_dir=args.images_root,
                            images_list=args.train_file_pre + '_images.txt',
                            bboxes_list=args.train_file_pre + '_bbox.json',
                            image_size=args.image_size,
                            input_transform=transform_train)
    elif eval_type == 'valid':
        data = ImageDataset(max_person=args.max_person + 1, image_dir=args.images_root, \
                                       images_list=args.valid_file_pre + '_images.txt', \
                                       bboxes_list=args.valid_file_pre + '_bbox.json', \
                                       image_size=args.image_size, \
                                       input_transform=transform_test)
    else:
        data = ImageDataset(max_person=args.max_person + 1, image_dir=args.images_root, \
                                      images_list=args.test_file_pre + '_images.txt', \
                                      bboxes_list=args.test_file_pre + '_bbox.json', \
                                      image_size=args.image_size, \
                                      input_transform=transform_test)
    loader = torch.utils.data.DataLoader(data,
                                         batch_size=args.test_batch_size,
                                         shuffle=False,
                                         num_workers=0,
                                         worker_init_fn=np.random.seed(
                                             args.manualSeed))

    model.eval()
    batch_time = AverageMeter.AverageMeter()
    end_time = time.time()

    union_filename = './para_data/' + args.dataset + '_' + eval_type + '_union' + '.npy'
    feat_filename = './para_data/' + args.dataset + '_' + eval_type + '_ssl.npy'
    ssl_model = load_ssl_model()
    ssl_model.eval()
    if args.cuda:
        ssl_model.cuda()
        ssl_model = torch.nn.DataParallel(ssl_model)

    if os.path.exists(feat_filename) and os.path.exists(
            union_filename) and not args.regenerate_roifeat:
        all_feat = np.load(feat_filename)
        logger.info('loadding RoI feature npy from {} successfully'.format(
            feat_filename))
        all_union_feat = np.load(union_filename,
                                 mmap_mode='r')  # [B, N, N, 2048]
        logger.info(
            'loading union npy from {} successfully'.format(union_filename))
    else:
        all_feat, all_union_feat = [], []
        for batch_idx, (img, image_bboxes) in enumerate(loader):
            if args.cuda:
                img, image_bboxes = img.cuda(), image_bboxes.cuda()
            img, image_bboxes = Variable(img), Variable(image_bboxes)
            node_num = image_bboxes.shape[1]

            union_boxes = get_union_box(image_bboxes)
            if args.cuda:
                union_boxes = union_boxes.cuda()
            image_bboxes = torch.cat((image_bboxes, union_boxes), axis=1)
            del union_boxes
            image_bboxes = Variable(image_bboxes)

            # [batcn, node_num, 2048]
            rois_feature_all = model(img, image_bboxes)
            feature_num = rois_feature_all.shape[2]
            rois_feature = rois_feature_all[:, :node_num, :]
            union_feature = rois_feature_all[:, node_num:, :].reshape(
                -1, node_num, node_num, feature_num)
            if args.load_ssl_model:
                img_feat = ssl_model(
                    img, image_bboxes)  # [batch_size, max_person+1, feat_dim]
                rois_feature = torch.cat((rois_feature, img_feat[:, -1:, :]),
                                         dim=1)

            all_feat.append(rois_feature.cpu().data.numpy())
            all_union_feat.append(union_feature.cpu().data.numpy())

            batch_time.update(time.time() - end_time)
            end_time = time.time()

            if batch_idx % args.print_freq == 0:
                logger.info('%s Epoch: [%d/%d]  '
                            'Time %.3f (%.3f)\t' %
                            (eval_type, batch_idx, len(loader), batch_time.val,
                             batch_time.avg))

        all_feat = np.concatenate(all_feat)
        all_union_feat = np.concatenate(all_union_feat)
        np.save(feat_filename, all_feat)
        np.save(union_filename, all_union_feat)

    class_weight, class_count = [], []

    if eval_type == 'train':
        dataset = SRDataset(all_feat, all_union_feat, max_person=args.max_person+1, image_dir=args.images_root, \
            images_list=args.train_file_pre + '_images.txt',
            relations_list=args.train_file_pre + '_relation.json', image_size=args.image_size)
        class_weight, class_count = dataset.class_weight()
        batch_size = args.batch_size
        is_shuffle = True
    elif eval_type == 'valid':
        dataset = SRDataset(all_feat, all_union_feat, max_person=args.max_person+1,image_dir=args.images_root, \
            images_list=args.valid_file_pre + '_images.txt',
            relations_list=args.valid_file_pre + '_relation.json', image_size=args.image_size)
        batch_size = args.test_batch_size
        is_shuffle = False
    else:
        dataset = SRDataset(all_feat, all_union_feat, max_person=args.max_person+1,image_dir=args.images_root, \
                            images_list=args.test_file_pre + '_images.txt', \
                            relations_list=args.test_file_pre + '_relation.json', image_size=args.image_size
                            )
        batch_size = args.test_batch_size
        is_shuffle = False
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=is_shuffle,
                                             num_workers=args.num_workers,
                                             worker_init_fn=np.random.seed(
                                                 args.manualSeed))

    return dataloader, class_weight, class_count
Beispiel #16
0
    dataset_root_path_1 = ""
    dataset_root_path_2 = ""
    split = "train"
    percentage = 15
    NUM_WORKERS = 0
    BATCH_SIZE = 4
    TGT_IMAGES_IN_BATCH = 1
    DEVICE = "cuda"
    DATASET_NAME_1 = ""
    DATASET_NAME_2 = ""

    transforms = None
    if split == "train":
        transforms = custom_T.Compose([
            custom_T.RandomHorizontalFlip(),
            custom_T.RandomCrop(),
            custom_T.ToTensor(),
            custom_T.FasterRCNNResizer()
        ])
    elif split == "val" or split == "test":
        transforms = custom_T.Compose(
            [custom_T.ToTensor(),
             custom_T.FasterRCNNResizer()])

    dataset_1 = CustomYoloAnnotatedDataset(dataset_root_path_1,
                                           transforms=transforms,
                                           dataset_name=DATASET_NAME_1,
                                           percentage=percentage,
                                           split=split)
    dataset_2 = CustomYoloAnnotatedDataset(dataset_root_path_2,
                                           transforms=transforms,