def __getitem__(self, index): if self.is_train: img, target = self.train_img[index], self.train_label[index] if len(img.shape) == 2: img = np.stack([img] * 3, 2) img = Image.fromarray(img, mode="RGB") img = transforms.Resize((256, 256), Image.BILINEAR)(img) img = transforms.RandomCrop(INPUT_SIZE)(img) img = transforms.RandomHorizontalFlip()(img) img = transforms.ToTensor()(img) img = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])(img) else: img, target = self.test_img[index], self.test_label[index] if len(img.shape) == 2: img = np.stack([img] * 3, 2) img = Image.fromarray(img, mode="RGB") img = transforms.Resize((256, 256), Image.BILINEAR)(img) img = transforms.CenterCrop(INPUT_SIZE)(img) img = transforms.ToTensor()(img) img = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])(img) return img, target
def train(args): joint_transform = transforms.Compose([ transforms.RandomScale(), transforms.Mirror(), transforms.RandomCrop() ]) trainset = datasets[args.dataset](mode=args.mode, root=args.dataset_root) net = models[args.g]
def __init__(self, train=True): self.root = './data/ESC50/ESC-50-master/audio/' self.train = train #getting name of all files inside the all of the train_folds temp = open('./data/ESC50/ESC10_file_names.txt', 'r').read().split('\n') temp.sort() self.file_names = [] if train: for i in range(len(temp)): if int(temp[i].split('-')[0]) in config.train_folds: self.file_names.append(temp[i]) else: for i in range(len(temp)): if int(temp[i].split('-')[0]) in config.test_fold: self.file_names.append(temp[i]) if self.train: self.wave_transforms = torchvision.transforms.Compose([ transforms.ToTensor1D(), transforms.RandomScale(max_scale=1.25), transforms.RandomPadding(out_len=220500), transforms.RandomCrop(out_len=220500) ]) self.spec_transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), transforms.FrequencyMask(max_width=config.freq_masks_width, numbers=config.freq_masks), transforms.TimeMask(max_width=config.time_masks_width, numbers=config.time_masks) ]) else: #for test self.wave_transforms = torchvision.transforms.Compose([ transforms.ToTensor1D(), transforms.RandomPadding(out_len=220500), transforms.RandomCrop(out_len=220500) ]) self.spec_transforms = torchvision.transforms.Compose( [torchvision.transforms.ToTensor()])
def get_transform(train=False): transforms = [] if train: transforms.append(custom_T.RandomHorizontalFlip()) transforms.append(custom_T.RandomCrop()) transforms.append(custom_T.ToTensor()) transforms.append(custom_T.FasterRCNNResizer()) return custom_T.Compose(transforms)
def __init__(self, root=None, dataloader=default_loader): self.transform1 = transforms.Compose([ transforms.RandomRotation(30), transforms.Resize([256, 256]), transforms.RandomCrop(INPUT_SIZE), transforms.RandomHorizontalFlip(), transforms.ColorJitter(brightness=(0.9, 1.1), contrast=(0.9, 1.1), saturation=(0.9, 1.1)), transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), transforms.RandomErasing(probability=0.5, sh=0.05) ]) # 增强方法2: 关注更小的区域 self.transform2 = transforms.Compose([ transforms.RandomRotation(30), transforms.Resize([336, 336]), transforms.RandomCrop(INPUT_SIZE), transforms.RandomHorizontalFlip(), transforms.ColorJitter(brightness=(0.9, 1.1), contrast=(0.9, 1.1), saturation=(0.9, 1.1)), transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), transforms.RandomErasing(probability=0.5, sh=0.05) ]) self.dataloader = dataloader self.root = root with open(os.path.join(self.root, TRAIN_DATASET), 'r') as fid: self.imglist = fid.readlines() self.labels = [] for line in self.imglist: image_path, label = line.strip().split() self.labels.append(int(label)) self.labels = np.array(self.labels) self.labels = torch.LongTensor(self.labels)
def __init__(self, train=True): self.root = './data/US8K/audio/' self.train = train self.file_paths = [] #only includes the name of the fold and name of the file, like: 'fold2/4201-3-0-0.wav' if train: for f in config.train_folds: file_names = os.listdir(self.root + 'fold' + str(f) + '/' ) for name in file_names: if name.split('.')[-1] == 'wav': self.file_paths.append('fold' + str(f) + '/' + name) else: file_names = os.listdir(self.root + 'fold' + str(config.test_fold[0]) + '/' ) for name in file_names: if name.split('.')[-1] == 'wav': self.file_paths.append('fold' + str(config.test_fold[0]) + '/' + name) if self.train: self.wave_transforms = torchvision.transforms.Compose([ transforms.ToTensor1D(), transforms.RandomScale(max_scale = 1.25), transforms.RandomPadding(out_len = 176400), transforms.RandomCrop(out_len = 176400)]) self.spec_transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), transforms.FrequencyMask(max_width = config.freq_masks_width, numbers = config.freq_masks), transforms.TimeMask(max_width = config.time_masks_width, numbers = config.time_masks)]) else: #for test self.wave_transforms = torchvision.transforms.Compose([ transforms.ToTensor1D(), transforms.RandomPadding(out_len = 176400), transforms.RandomCrop(out_len = 176400)]) self.spec_transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor() ])
def get_dataloader(): # TODO(xwd): Adaptive normalization by some large image. # E.g. In medical image processing, WSI image is very large and different to ordinary images. value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] train_transform = transform.Compose([ transform.RandomScale([cfg['scale_min'], cfg['scale_max']]), transform.RandomRotate([cfg['rotate_min'], cfg['rotate_max']], padding=mean, ignore_label=cfg['ignore_label']), transform.RandomGaussianBlur(), transform.RandomHorizontallyFlip(), transform.RandomCrop([cfg['train_h'], cfg['train_w']], crop_type='rand', padding=mean, ignore_label=cfg['ignore_label']), transform.ToTensor(), transform.Normalize(mean=mean, std=std) ]) train_data = cityscapes.Cityscapes(cfg['data_path'], split='train', transform=train_transform) # Use data sampler to make sure each GPU loads specific parts of dataset to avoid data reduntant. train_sampler = DistributedSampler(train_data) train_loader = DataLoader(train_data, batch_size=cfg['batch_size'] // cfg['world_size'], shuffle=(train_sampler is None), num_workers=4, pin_memory=True, sampler=train_sampler, drop_last=True) return train_loader, train_sampler
def get_transform(train=False, yolo=False, aug=None): assert aug == 'dirty_camera_lens' or aug == 'gan' or aug is None, "Aug parameter not valid" transforms = [] if yolo: transforms.append(custom_T.PadToSquare()) transforms.append(custom_T.Resize(img_size=None)) if train: transforms.append(custom_T.RandomHorizontalFlip()) transforms.append(custom_T.RandomCrop()) if aug == 'dirty_camera_lens': print("Augmentation: Dirty Camera Lens") transforms.append(custom_T.DirtyCameraLens()) transforms.append(custom_T.ToTensor()) # transforms.append(custom_T.FasterRCNNResizer()) return custom_T.Compose(transforms)
def __init__(self, root=None, dataloader=default_loader): self.transform = transforms.Compose([ transforms.Resize([256, 256]), transforms.RandomCrop(INPUT_SIZE), transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) self.dataloader = dataloader self.root = root self.imgs = [] self.labels = [] with open(os.path.join(self.root, EVAL_DATASET), 'r') as fid: for line in fid.readlines(): img_path, label = line.strip().split() img = self.dataloader(img_path) label = int(label) self.imgs.append(img) self.labels.append(label)
def main(): global BEST_LOSS cudnn.benchmark = True start_epoch = cfg.OPOSE.start_epoch # start from epoch 0 or last checkpoint epoch # Create ckpt & vis folder if not os.path.isdir(cfg.OPOSE.ckpt): os.makedirs(cfg.OPOSE.ckpt) if not os.path.exists(os.path.join(cfg.OPOSE.ckpt, 'vis')): os.makedirs(os.path.join(cfg.OPOSE.ckpt, 'vis')) if args.cfg_file is not None and not cfg.OPOSE.evaluate: shutil.copyfile( args.cfg_file, os.path.join(cfg.OPOSE.ckpt, args.cfg_file.split('/')[-1])) model = pose_estimation.PoseModel(num_point=19, num_vector=19, pretrained=True) # # Calculate FLOPs & Param # n_flops, n_convops, n_params = measure_model(model, cfg.OPOSE.input_size, cfg.OPOSE.input_size) criterion = nn.MSELoss().cuda() # Dataset and Loader train_dataset = dataset.CocoOpenposeData( cfg, cfg.OPOSE.data_root, cfg.OPOSE.info_root, 'train2017', transformer=transforms.Compose([ transforms.RandomResized(), transforms.RandomRotate(40), transforms.RandomCrop(368), transforms.RandomHorizontalFlip(), ])) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=int(cfg.workers), pin_memory=True) if cfg.OPOSE.validate or cfg.OPOSE.evaluate: val_dataset = dataset.CocoOpenposeData( cfg, cfg.OPOSE.data_root, cfg.OPOSE.info_root, 'val2017', transformer=transforms.Compose( [transforms.TestResized(cfg.OPOSE.input_size)])) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=int(cfg.workers), pin_memory=True) # Load nets into gpu if NUM_GPUS > 1: model = torch.nn.DataParallel(model, device_ids=gpu).cuda() # Set up optimizers params, multiple = get_parameters(model, cfg, False) optimizer = torch.optim.SGD(params, cfg.OPOSE.base_lr, momentum=cfg.OPOSE.momentum, weight_decay=cfg.OPOSE.weight_decay) # Resume training title = 'Pytorch-OPOSE-{}-{}'.format(cfg.OPOSE.arch_encoder, cfg.OPOSE.arch_decoder) if cfg.OPOSE.resume: # Load checkpoint. print("==> Resuming from checkpoint '{}'".format(cfg.OPOSE.resume)) assert os.path.isfile( cfg.OPOSE.resume), 'Error: no checkpoint directory found!' ckpt = torch.load(cfg.OPOSE.resume) BEST_LOSS = ckpt['best_loss'] start_epoch = ckpt['epoch'] try: model.module.load_state_dict(ckpt['state_dict']) except: model.load_state_dict(ckpt['state_dict']) optimizer.load_state_dict(ckpt['optimizer']) logger = Logger(os.path.join(cfg.OPOSE.ckpt, 'log.txt'), title=title, resume=True) else: logger = Logger(os.path.join(cfg.OPOSE.ckpt, 'log.txt'), title=title) logger.set_names( ['epoch', 'Learning Rate', 'Train Loss', 'Valid Loss']) # Train and val for epoch in range(start_epoch, cfg.OPOSE.epochs): print('\nEpoch: [{}/{}] | LR: {:.8f} '.format(epoch + 1, cfg.OPOSE.epochs, cfg.OPOSE.base_lr)) train_loss = train(train_loader, model, criterion, optimizer, epoch, USE_CUDA) if cfg.OPOSE.validate: test_loss = test(val_loader, model, criterion, optimizer, epoch, USE_CUDA) else: test_loss = 0.0, 0.0 # Append logger file logger.append([epoch, cfg.OPOSE.base_lr, train_loss, test_loss]) # Save model save_checkpoint(model, optimizer, test_loss, epoch) # Adjust learning rate adjust_learning_rate(optimizer, epoch) # Draw curve try: draw_curve('model', cfg.OPOSE.ckpt) print('==> Success saving log curve...') except: print('==> Saving log curve error...') logger.close() try: savefig(os.path.join(cfg.OPOSE.ckpt, 'log.eps')) shutil.copyfile( os.path.join(cfg.OPOSE.ckpt, 'log.txt'), os.path.join( cfg.OPOSE.ckpt, 'log{}.txt'.format( datetime.datetime.now().strftime('%Y%m%d%H%M%S')))) except: print('Copy log error.') print('==> Training Done!') print('==> Best acc: {:.4f}%'.format(BEST_LOSS))
def load_transform(args): ''' Returns data transform for each rating scale. ''' if args.vrs == 'mta': pre = [tfs.SwapAxes(1, 2)] mid_train = [ tfs.CenterCrop(args.size_x + 10, args.size_y + 10, args.size_z + 6, offset_x=args.offset_x, offset_y=args.offset_y, offset_z=args.offset_z), tfs.RandomCrop(args.size_x, args.size_y, args.size_z) ] mid_train_2 = [tfs.ReduceSlices(1, 2)] mid_test = [ tfs.CenterCrop(args.size_x, args.size_y, args.size_z, offset_x=args.offset_x, offset_y=args.offset_y, offset_z=args.offset_z) ] if args.arch == 'VGG_bl': # for VGG baseline comparison in research paper post = [tfs.ToTensorFSL(), tfs.PerImageNormalization()] else: post = [ tfs.ToTensorFSL(), tfs.PerImageNormalization(), tfs.Return5D(nc=args.nc) ] transform_train = Compose( compose_transform_parts(pre=pre, mid=mid_train, post=post)) transform_train_x = Compose( compose_transform_parts(pre=pre, mid=mid_train, mid_2=mid_train_2, post=post)) transform_test = Compose( compose_transform_parts(pre=pre, mid=mid_test, post=post)) elif args.vrs == 'gca-f': pre = None mid_train = [ #tf.RotateVolume(1),tf.RotateVolume(0), tfs.CenterCrop(args.size_x + 10, args.size_y + 10, args.size_z + 6, offset_x=args.offset_x, offset_y=args.offset_y, offset_z=args.offset_z), tfs.RandomCrop(args.size_x, args.size_y, args.size_z) ] mid_train_2 = [tfs.ReduceSlices(1, 2), tfs.RandomMirrorLR(0)] mid_train_2_x = [tfs.ReduceSlices(1, 3), tfs.RandomMirrorLR(0)] mid_test = [ tfs.CenterCrop(args.size_x, args.size_y, args.size_z, offset_x=args.offset_x, offset_y=args.offset_y, offset_z=args.offset_z), tfs.ReduceSlices(1, 2) ] if args.arch == 'VGG_bl': post = [tfs.ToTensorFSL(), tfs.PerImageNormalization()] else: post_train = [ tfs.ToTensorFSL(), tfs.PerImageNormalization(), tfs.RandomNoise(noise_var=.05, p=.5), tfs.Return5D(nc=args.nc) ] post_test = [ tfs.ToTensorFSL(), tfs.PerImageNormalization(), tfs.Return5D(nc=args.nc) ] transform_train = Compose( compose_transform_parts(pre=pre, mid=mid_train, mid_2=mid_train_2, post=post_train)) transform_train_x = Compose( compose_transform_parts(pre=pre, mid=mid_train, mid_2=mid_train_2_x, post=post_train)) transform_test = Compose( compose_transform_parts(pre=pre, mid=mid_test, post=post_test)) elif args.vrs == 'pa': pre = None mid_train = [ tfs.CenterCrop(args.size_x + 10, args.size_y + 10, args.size_z + 6, offset_y=args.offset_y, offset_x=args.offset_x, offset_z=args.offset_z), tfs.RandomCrop(args.size_x, args.size_y, args.size_z) ] mid_test = [ tfs.CenterCrop(args.size_x, args.size_y, args.size_z, offset_y=args.offset_y, offset_x=args.offset_x, offset_z=args.offset_z) ] if args.arch == 'VGG_bl': post = [ tfs.ToTensorFSL(), tfs.PerImageNormalization(), tfs.ReturnStackedPA(nc=args.nc, rnn=False) ] else: post = [ tfs.ToTensorFSL(), tfs.PerImageNormalization(), tfs.ReturnStackedPA(nc=args.nc) ] transform_train = Compose( compose_transform_parts(pre=pre, mid=mid_train, post=post)) transform_train_x = Compose( compose_transform_parts(pre=pre, mid=mid_train, post=post)) transform_test = Compose( compose_transform_parts(pre=pre, mid=mid_test, post=post)) return transform_train, transform_test, transform_train_x
import utils.transforms as trans from model.deeplab import DeepLab import matplotlib.pyplot as plt from tools import prediction from utils.metrics import Evaluator args = get_args() rng = np.random.RandomState(seed=args.seed) torch.manual_seed(seed=args.seed) transform_train = trans.Compose([ trans.RandomHorizontalFlip(), #trans.FixScale((args.crop_size,args.crop_size)), trans.RandomScale((0.5, 2.0)), #trans.FixScale(args.crop_size), trans.RandomCrop(args.crop_size), trans.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), trans.ToTensor(), ]) transform_val = trans.Compose([ #trans.FixScale((args.crop_size,args.crop_size)), trans.FixScale(args.crop_size), trans.CenterCrop(args.crop_size), trans.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), trans.ToTensor(), ]) if (args.aug == True): voc_train = VOCSegmentation(root='./data', set_name='train', transform=transform_train)
def build_model(self): """ DataLoader """ pad = int(30 * self.img_size // 256) train_transform = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.Resize((self.img_size + pad, self.img_size + pad)), transforms.RandomCrop(self.img_size), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ]) test_transform = transforms.Compose([ transforms.Resize((self.img_size, self.img_size)), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ]) self.trainA = ImageFolder( os.path.join('dataset', self.dataset, 'trainA'), train_transform) self.trainB = ImageFolder( os.path.join('dataset', self.dataset, 'trainB'), train_transform) self.testA = ImageFolder( os.path.join('dataset', self.dataset, 'testA'), test_transform) self.testB = ImageFolder( os.path.join('dataset', self.dataset, 'testB'), test_transform) self.trainA_loader = DataLoader(self.trainA, batch_size=self.batch_size, shuffle=True) self.trainB_loader = DataLoader(self.trainB, batch_size=self.batch_size, shuffle=True) self.testA_loader = DataLoader(self.testA, batch_size=1, shuffle=False) self.testB_loader = DataLoader(self.testB, batch_size=1, shuffle=False) """ Define Generator, Discriminator """ self.genA2B = ResnetGenerator(input_nc=3, output_nc=3, ngf=self.ch, n_blocks=self.n_res, img_size=self.img_size, light=self.light) self.genB2A = ResnetGenerator(input_nc=3, output_nc=3, ngf=self.ch, n_blocks=self.n_res, img_size=self.img_size, light=self.light) self.disGA = Discriminator(input_nc=3, ndf=self.ch, n_layers=7) self.disGB = Discriminator(input_nc=3, ndf=self.ch, n_layers=7) self.disLA = Discriminator(input_nc=3, ndf=self.ch, n_layers=5) self.disLB = Discriminator(input_nc=3, ndf=self.ch, n_layers=5) """ Define Loss """ self.L1_loss = loss.L1Loss() self.MSE_loss = loss.MSELoss() self.BCE_loss = loss.BCEWithLogitsLoss() """ Trainer """ def get_params(block): out = [] for name, param in block.named_parameters(): if 'instancenorm' in name or 'weight_u' in name or 'weight_v' in name: continue out.append(param) return out genA2B_parameters = get_params(self.genA2B) genB2A_parameters = get_params(self.genB2A) disGA_parameters = get_params(self.disGA) disGB_parameters = get_params(self.disGB) disLA_parameters = get_params(self.disLA) disLB_parameters = get_params(self.disLB) G_parameters = genA2B_parameters + genB2A_parameters D_parameters = disGA_parameters + disGB_parameters + disLA_parameters + disLB_parameters self.G_optim = fluid.optimizer.Adam( parameter_list=G_parameters, learning_rate=self.lr, beta1=0.5, beta2=0.999, regularization=fluid.regularizer.L2Decay(self.weight_decay)) self.D_optim = fluid.optimizer.Adam( parameter_list=D_parameters, learning_rate=self.lr, beta1=0.5, beta2=0.999, regularization=fluid.regularizer.L2Decay(self.weight_decay)) """ Define Rho clipper to constraint the value of rho in AdaILN and ILN"""
fin.write(model_path + ' ' + str(epoch) + '\n') #dataset prepare #--------------------------------- print('Loading dataset...') cache_size = 256 if args.image_size == 448: cache_size = 256 * 2 if args.image_size == 352: cache_size = 402 transform_train = transforms.Compose([ transforms.Resize((cache_size,cache_size)), #transforms.Resize((args.image_size,args.image_size)), #transforms.RandomRotation(10), transforms.RandomCrop(args.image_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225]), ]) transform_test = transforms.Compose([ transforms.Resize((cache_size,cache_size)), transforms.CenterCrop(args.image_size), transforms.ToTensor(), transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225]), ]) print("Dataset Initializing...") trainset = SRDataset.SRDataset(max_person=args.max_person,image_dir=args.images_root, \
def generate_dataloader(model, eval_type, args): cache_size = 256 if args.image_size == 448: cache_size = 256 * 2 if args.image_size == 352: cache_size = 402 transform_train = transforms.Compose([ transforms.Resize((cache_size, cache_size)), transforms.RandomCrop(args.image_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) transform_test = transforms.Compose([ transforms.Resize((cache_size, cache_size)), transforms.CenterCrop(args.image_size), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) if eval_type == 'train': data = ImageDataset(max_person=args.max_person + 1, image_dir=args.images_root, images_list=args.train_file_pre + '_images.txt', bboxes_list=args.train_file_pre + '_bbox.json', image_size=args.image_size, input_transform=transform_train) elif eval_type == 'valid': data = ImageDataset(max_person=args.max_person + 1, image_dir=args.images_root, \ images_list=args.valid_file_pre + '_images.txt', \ bboxes_list=args.valid_file_pre + '_bbox.json', \ image_size=args.image_size, \ input_transform=transform_test) else: data = ImageDataset(max_person=args.max_person + 1, image_dir=args.images_root, \ images_list=args.test_file_pre + '_images.txt', \ bboxes_list=args.test_file_pre + '_bbox.json', \ image_size=args.image_size, \ input_transform=transform_test) loader = torch.utils.data.DataLoader(data, batch_size=args.test_batch_size, shuffle=False, num_workers=0, worker_init_fn=np.random.seed( args.manualSeed)) model.eval() batch_time = AverageMeter.AverageMeter() end_time = time.time() union_filename = './para_data/' + args.dataset + '_' + eval_type + '_union' + '.npy' feat_filename = './para_data/' + args.dataset + '_' + eval_type + '_ssl.npy' ssl_model = load_ssl_model() ssl_model.eval() if args.cuda: ssl_model.cuda() ssl_model = torch.nn.DataParallel(ssl_model) if os.path.exists(feat_filename) and os.path.exists( union_filename) and not args.regenerate_roifeat: all_feat = np.load(feat_filename) logger.info('loadding RoI feature npy from {} successfully'.format( feat_filename)) all_union_feat = np.load(union_filename, mmap_mode='r') # [B, N, N, 2048] logger.info( 'loading union npy from {} successfully'.format(union_filename)) else: all_feat, all_union_feat = [], [] for batch_idx, (img, image_bboxes) in enumerate(loader): if args.cuda: img, image_bboxes = img.cuda(), image_bboxes.cuda() img, image_bboxes = Variable(img), Variable(image_bboxes) node_num = image_bboxes.shape[1] union_boxes = get_union_box(image_bboxes) if args.cuda: union_boxes = union_boxes.cuda() image_bboxes = torch.cat((image_bboxes, union_boxes), axis=1) del union_boxes image_bboxes = Variable(image_bboxes) # [batcn, node_num, 2048] rois_feature_all = model(img, image_bboxes) feature_num = rois_feature_all.shape[2] rois_feature = rois_feature_all[:, :node_num, :] union_feature = rois_feature_all[:, node_num:, :].reshape( -1, node_num, node_num, feature_num) if args.load_ssl_model: img_feat = ssl_model( img, image_bboxes) # [batch_size, max_person+1, feat_dim] rois_feature = torch.cat((rois_feature, img_feat[:, -1:, :]), dim=1) all_feat.append(rois_feature.cpu().data.numpy()) all_union_feat.append(union_feature.cpu().data.numpy()) batch_time.update(time.time() - end_time) end_time = time.time() if batch_idx % args.print_freq == 0: logger.info('%s Epoch: [%d/%d] ' 'Time %.3f (%.3f)\t' % (eval_type, batch_idx, len(loader), batch_time.val, batch_time.avg)) all_feat = np.concatenate(all_feat) all_union_feat = np.concatenate(all_union_feat) np.save(feat_filename, all_feat) np.save(union_filename, all_union_feat) class_weight, class_count = [], [] if eval_type == 'train': dataset = SRDataset(all_feat, all_union_feat, max_person=args.max_person+1, image_dir=args.images_root, \ images_list=args.train_file_pre + '_images.txt', relations_list=args.train_file_pre + '_relation.json', image_size=args.image_size) class_weight, class_count = dataset.class_weight() batch_size = args.batch_size is_shuffle = True elif eval_type == 'valid': dataset = SRDataset(all_feat, all_union_feat, max_person=args.max_person+1,image_dir=args.images_root, \ images_list=args.valid_file_pre + '_images.txt', relations_list=args.valid_file_pre + '_relation.json', image_size=args.image_size) batch_size = args.test_batch_size is_shuffle = False else: dataset = SRDataset(all_feat, all_union_feat, max_person=args.max_person+1,image_dir=args.images_root, \ images_list=args.test_file_pre + '_images.txt', \ relations_list=args.test_file_pre + '_relation.json', image_size=args.image_size ) batch_size = args.test_batch_size is_shuffle = False dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=is_shuffle, num_workers=args.num_workers, worker_init_fn=np.random.seed( args.manualSeed)) return dataloader, class_weight, class_count
dataset_root_path_1 = "" dataset_root_path_2 = "" split = "train" percentage = 15 NUM_WORKERS = 0 BATCH_SIZE = 4 TGT_IMAGES_IN_BATCH = 1 DEVICE = "cuda" DATASET_NAME_1 = "" DATASET_NAME_2 = "" transforms = None if split == "train": transforms = custom_T.Compose([ custom_T.RandomHorizontalFlip(), custom_T.RandomCrop(), custom_T.ToTensor(), custom_T.FasterRCNNResizer() ]) elif split == "val" or split == "test": transforms = custom_T.Compose( [custom_T.ToTensor(), custom_T.FasterRCNNResizer()]) dataset_1 = CustomYoloAnnotatedDataset(dataset_root_path_1, transforms=transforms, dataset_name=DATASET_NAME_1, percentage=percentage, split=split) dataset_2 = CustomYoloAnnotatedDataset(dataset_root_path_2, transforms=transforms,