def generate_dataset_in_random_mode(self, n, description_file, seed=0): set_random_seed(seed) description = read_json_file(description_file) self.synthetic_dataset = DataFrame() for attr in description['attribute_description'].keys(): attr_info = description['attribute_description'][attr] datatype = attr_info['data_type'] is_categorical = attr_info['is_categorical'] is_candidate_key = attr_info['is_candidate_key'] minimum = attr_info['min'] maximum = attr_info['max'] static_num = attr_info['min'] if minimum == maximum else None if is_candidate_key: self.synthetic_dataset[attr] = parse_json( attr_info).generate_values_as_candidate_key(n) elif is_categorical: self.synthetic_dataset[attr] = random.choice( attr_info['distribution_bins'], n) elif datatype == 'String': length = static_num or random.randint(minimum, maximum) self.synthetic_dataset[attr] = length self.synthetic_dataset[attr] = self.synthetic_dataset[ attr].map(lambda x: generate_random_string(x)) else: if datatype == 'Integer': self.synthetic_dataset[ attr] = static_num or random.randint( minimum, maximum + 1, n) else: self.synthetic_dataset[ attr] = static_num or random.uniform( minimum, maximum, n)
def generate_dataset_in_correlated_attribute_mode(self, n, description_file, seed=0): set_random_seed(seed) self.n = n self.description = read_json_file(description_file) all_attributes = self.description['meta']['all_attributes'] candidate_keys = set(self.description['meta']['candidate_keys']) self.encoded_dataset = DataGenerator.generate_encoded_dataset( self.n, self.description) self.synthetic_dataset = DataFrame(columns=all_attributes) for attr in all_attributes: attr_info = self.description['attribute_description'][attr] column = parse_json(attr_info) if attr in self.encoded_dataset: self.synthetic_dataset[ attr] = column.sample_values_from_binning_indices( self.encoded_dataset[attr]) elif attr in candidate_keys: self.synthetic_dataset[ attr] = column.generate_values_as_candidate_key(n) else: # for attributes not in BN or candidate keys, use independent attribute mode. binning_indices = column.sample_binning_indices_in_independent_attribute_mode( n) self.synthetic_dataset[ attr] = column.sample_values_from_binning_indices( binning_indices)
def main(): args = parse_args() cfg = Config.fromfile(args.config) if cfg.get('cudnn_benchmark', True): torch.backends.cudnn.benchmark = True if args.validate: cfg.validate = args.validate # cudnn default true # override by args if args.work_dir is not None: cfg.work_dir = args.work_dir if args.resume_from is not None: cfg.load_checkpoint = args.resume_from # init dist training if args.launcher == 'none': raise NotImplementedError else: # free_port = find_free_port() # dist_url = f'tcp://127.0.0.1:{free_port}' # only be used in pytorch dist mode init_dist(args.launcher) # init logger before other steps logger = get_root_logger(cfg.work_dir, cfg.log_level) logger.info(f'Distributed training with {args.world_size}') # set random seeds rank = args.local_rank if args.seed is not None: set_random_seed(args.seed + rank) if cfg.validate: # validate means validate only # if you dont have ckpt final_validate_checkpoint = cfg.load_checkpoint assert final_validate_checkpoint is not None engine = Engine(cfg, logger, only_final_validate=True) else: engine = Engine(cfg, logger, only_final_validate=False) if cfg.load_model is not None: # pretrained finetuning engine.load_pretrained(cfg.load_model) if cfg.load_checkpoint is not None: engine.load_modelandstatus(cfg.load_checkpoint) engine.run() final_validate_checkpoint = os.path.join(cfg.work_dir, 'model_best.pth.tar') logger.info('doing the final validation') engine.load_modelandstatus(final_validate_checkpoint) engine.validate_epoch(final_validate=True)
def describe_dataset_in_random_mode( self, dataset_file: str, attribute_to_datatype: Dict[str, DataType] = None, attribute_to_is_categorical: Dict[str, bool] = None, attribute_to_is_candidate_key: Dict[str, bool] = None, categorical_attribute_domain_file: str = None, numerical_attribute_ranges: Dict[str, List] = None, seed=0): attribute_to_datatype = attribute_to_datatype or {} attribute_to_is_categorical = attribute_to_is_categorical or {} attribute_to_is_candidate_key = attribute_to_is_candidate_key or {} numerical_attribute_ranges = numerical_attribute_ranges or {} if categorical_attribute_domain_file: categorical_attribute_to_domain = utils.read_json_file( categorical_attribute_domain_file) else: categorical_attribute_to_domain = {} utils.set_random_seed(seed) self.attr_to_datatype = { attr: DataType(datatype) for attr, datatype in attribute_to_datatype.items() } self.attr_to_is_categorical = attribute_to_is_categorical self.attr_to_is_candidate_key = attribute_to_is_candidate_key self.read_dataset_from_csv(dataset_file) self.infer_attribute_data_types() self.analyze_dataset_meta() self.represent_input_dataset_by_columns() for column in self.attr_to_column.values(): attr_name = column.name if attr_name in categorical_attribute_to_domain: column.infer_domain( categorical_domain=categorical_attribute_to_domain[ attr_name]) elif attr_name in numerical_attribute_ranges: column.infer_domain( numerical_range=numerical_attribute_ranges[attr_name]) else: column.infer_domain() # record attribute information in json format self.data_description['attribute_description'] = {} for attr, column in self.attr_to_column.items(): self.data_description['attribute_description'][ attr] = column.to_json()
def generate_dataset_in_independent_mode(self, n, description_file, seed=0): set_random_seed(seed) self.description = read_json_file(description_file) all_attributes = self.description['meta']['all_attributes'] candidate_keys = set(self.description['meta']['candidate_keys']) self.synthetic_dataset = DataFrame(columns=all_attributes) for attr in all_attributes: attr_info = self.description['attribute_description'][attr] column = parse_json(attr_info) if attr in candidate_keys: self.synthetic_dataset[attr] = column.generate_values_as_candidate_key(n) else: binning_indices = column.sample_binning_indices_in_independent_attribute_mode(n) self.synthetic_dataset[attr] = column.sample_values_from_binning_indices(binning_indices)
def main(): args = parse_args() cfg = Config.fromfile(args.config) if cfg.get('cudnn_benchmark', True): torch.backends.cudnn.benchmark = True if args.validate: cfg.validate = args.validate # cudnn default true # override by args if args.work_dir is not None: cfg.work_dir = args.work_dir if args.resume_from is not None: cfg.load_checkpoint = args.resume_from # init dist training if args.launcher == 'none': raise NotImplementedError else: init_dist(args.launcher) # init logger before other steps logger = get_root_logger(cfg.work_dir, cfg.log_level) logger.info(f'Distributed training with {args.world_size}') # set random seeds rank = args.local_rank if args.seed is not None: set_random_seed(args.seed + rank) engine = Engine(cfg, logger) if cfg.load_model is not None: engine.load_model(cfg.load_model) if cfg.load_checkpoint is not None: engine.load_modelandstatus(cfg.load_checkpoint) engine.run()
p.add_argument('--downsampling_filters', nargs='+', default=['box']) p.add_argument('--resize_blur_min', type=float, default=0.95) p.add_argument('--resize_blur_max', type=float, default=1.05) p.add_argument('--epoch', '-e', type=int, default=50) p.add_argument('--inner_epoch', type=int, default=4) p.add_argument('--finetune', '-f', default=None) p.add_argument('--model_name', default=None) args = p.parse_args() if args.arch in srcnn.table: args.arch = srcnn.table[args.arch] warnings.filterwarnings('ignore') if __name__ == '__main__': utils.set_random_seed(args.seed, args.gpu) if args.color == 'y': ch = 1 weight = (1.0,) elif args.color == 'rgb': ch = 3 weight = (0.29891 * 3, 0.58661 * 3, 0.11448 * 3) weight = np.array(weight, dtype=np.float32) weight = weight[:, np.newaxis, np.newaxis] print('* loading filelist...', end=' ') filelist = utils.load_filelist(args.dataset_dir, shuffle=True) valid_num = int(np.ceil(args.validation_rate * len(filelist))) valid_list, train_list = filelist[:valid_num], filelist[valid_num:] print('done')
def main(): p = argparse.ArgumentParser(description='Chainer implementation of waifu2x') p.add_argument('--gpu', '-g', type=int, default=-1) p.add_argument('--seed', '-s', type=int, default=11) p.add_argument('--dataset_dir', '-d', required=True) p.add_argument('--validation_rate', type=float, default=0.05) p.add_argument('--nr_rate', type=float, default=0.65) p.add_argument('--chroma_subsampling_rate', type=float, default=0.5) p.add_argument('--reduce_memory_usage', action='store_true') p.add_argument('--out_size', type=int, default=64) p.add_argument('--max_size', type=int, default=256) p.add_argument('--active_cropping_rate', type=float, default=0.5) p.add_argument('--active_cropping_tries', type=int, default=10) p.add_argument('--random_half_rate', type=float, default=0.0) p.add_argument('--random_color_noise_rate', type=float, default=0.0) p.add_argument('--random_unsharp_mask_rate', type=float, default=0.0) p.add_argument('--learning_rate', type=float, default=0.00025) p.add_argument('--lr_min', type=float, default=0.00001) p.add_argument('--lr_decay', type=float, default=0.9) p.add_argument('--lr_decay_interval', type=int, default=5) p.add_argument('--batch_size', '-b', type=int, default=16) p.add_argument('--patches', '-p', type=int, default=64) p.add_argument('--validation_crop_rate', type=float, default=0.5) p.add_argument('--downsampling_filters', nargs='+', default=['box']) p.add_argument('--resize_blur_min', type=float, default=0.95) p.add_argument('--resize_blur_max', type=float, default=1.05) p.add_argument('--epoch', '-e', type=int, default=50) p.add_argument('--inner_epoch', type=int, default=4) p.add_argument('--finetune', '-f', default=None) p.add_argument('--model_name', default=None) p.add_argument('--color', '-c', default='rgb', choices=['y', 'rgb']) p.add_argument('--arch', '-a', default='VGG7', choices=['VGG7', '0', 'UpConv7', '1', 'ResNet10', '2', 'UpResNet10', '3']) p.add_argument('--method', '-m', default='scale', choices=['noise', 'scale', 'noise_scale'],) p.add_argument('--noise_level', '-n', type=int, default=1, choices=[0, 1, 2, 3]) args = p.parse_args() if args.arch in srcnn.table: args.arch = srcnn.table[args.arch] utils.set_random_seed(args.seed, args.gpu) if args.color == 'y': ch = 1 weight = (1.0,) elif args.color == 'rgb': ch = 3 weight = (0.29891 * 3, 0.58661 * 3, 0.11448 * 3) weight = np.array(weight, dtype=np.float32) weight = weight[:, np.newaxis, np.newaxis] print('* loading filelist...', end=' ') filelist = utils.load_filelist(args.dataset_dir, shuffle=True) valid_num = int(np.ceil(args.validation_rate * len(filelist))) valid_list, train_list = filelist[:valid_num], filelist[valid_num:] print('done') print('* setup model...', end=' ') if args.model_name is None: if args.method == 'noise': model_name = 'anime_style_noise{}'.format(args.noise_level) elif args.method == 'scale': model_name = 'anime_style_scale' elif args.method == 'noise_scale': model_name = 'anime_style_noise{}_scale'.format(args.noise_level) model_path = '{}_{}.npz'.format(model_name, args.color) else: model_name = args.model_name.rstrip('.npz') model_path = model_name + '.npz' if not os.path.exists('epoch'): os.makedirs('epoch') model = srcnn.archs[args.arch](ch) if model.offset % model.inner_scale != 0: raise ValueError('offset %% inner_scale must be 0.') elif model.inner_scale != 1 and model.inner_scale % 2 != 0: raise ValueError('inner_scale must be 1 or an even number.') if args.finetune is not None: chainer.serializers.load_npz(args.finetune, model) if args.gpu >= 0: chainer.backends.cuda.check_cuda_available() chainer.backends.cuda.get_device(args.gpu).use() weight = chainer.backends.cuda.to_gpu(weight) model.to_gpu() optimizer = optimizers.Adam(alpha=args.learning_rate) optimizer.setup(model) print('done') valid_config = utils.get_config(args, model, train=False) train_config = utils.get_config(args, model, train=True) print('* check forward path...', end=' ') di = train_config.in_size do = train_config.out_size dx = model.xp.zeros((args.batch_size, ch, di, di), dtype=np.float32) dy = model(dx) if dy.shape[2:] != (do, do): raise ValueError('Invlid output size\n' 'Expect: {}\n' 'Actual: ({}, {})'.format(dy.shape[2:], do, do)) print('done') print('* starting processes of dataset sampler...', end=' ') valid_queue = DatasetSampler(valid_list, valid_config) train_queue = DatasetSampler(train_list, train_config) print('done') best_count = 0 best_score = 0 best_loss = np.inf for epoch in range(0, args.epoch): print('### epoch: {} ###'.format(epoch)) train_queue.reload_switch(init=(epoch < args.epoch - 1)) for inner_epoch in range(0, args.inner_epoch): best_count += 1 print(' # inner epoch: {}'.format(inner_epoch)) start = time.time() train_loss = train_inner_epoch( model, weight, optimizer, train_queue, args.batch_size) if args.reduce_memory_usage: train_queue.wait() if train_loss < best_loss: best_loss = train_loss print(' * best loss on training dataset: {:.6f}'.format( train_loss)) valid_score = valid_inner_epoch( model, valid_queue, args.batch_size) if valid_score > best_score: best_count = 0 best_score = valid_score print(' * best score on validation dataset: PSNR {:.6f} dB' .format(valid_score)) best_model = model.copy().to_cpu() epoch_path = 'epoch/{}_epoch{}.npz'.format(model_name, epoch) chainer.serializers.save_npz(model_path, best_model) shutil.copy(model_path, epoch_path) if best_count >= args.lr_decay_interval: best_count = 0 optimizer.alpha *= args.lr_decay if optimizer.alpha < args.lr_min: optimizer.alpha = args.lr_min else: print(' * learning rate decay: {:.6f}'.format( optimizer.alpha)) print(' * elapsed time: {:.6f} sec'.format(time.time() - start))
def main(): p = argparse.ArgumentParser() p.add_argument('--gpu', '-g', type=int, default=-1) p.add_argument('--input', '-i', default='../images/original.png') p.add_argument('--arch', '-a', default='') p.add_argument('--tta', '-t', action='store_true') p.add_argument('--batch_size', '-b', type=int, default=16) p.add_argument('--block_size', '-l', type=int, default=128) p.add_argument('--chroma_subsampling', '-j', action='store_true') p.add_argument('--downsampling_filter', '-d', default='box') p.add_argument('--method', '-m', default='scale', choices=['scale', 'noise_scale']) p.add_argument('--noise_level', '-n', type=int, default=1, choices=[0, 1]) p.add_argument('--color', '-c', default='rgb', choices=['y', 'rgb']) p.add_argument('--tta_level', '-T', type=int, default=8, choices=[2, 4, 8]) args = p.parse_args() if args.arch in srcnn.table: args.arch = srcnn.table[args.arch] utils.set_random_seed(0, args.gpu) if os.path.isdir(args.input): filelist = utils.load_filelist(args.input) else: filelist = [args.input] images = [] for path in filelist: basename = os.path.basename(path) _, ext = os.path.splitext(basename) if ext.lower() in ['.png', '.bmp', '.tif', '.tiff']: img = Image.open(path).convert('RGB') w, h = img.size[:2] img = img.crop((0, 0, w - (w % 2), h - (h % 2))) images.append(img) qualities = [50, 60, 70, 80, 90, 100] sampling_factor = '1x1,1x1,1x1' if args.chroma_subsampling: sampling_factor = '2x2,1x1,1x1' arch_scores = {} for arch in srcnn.table.values(): args.arch = arch models = load_models(args) scores = [] sems = [] for quality in qualities: print(arch, quality) start = time.time() score, sem = benchmark(args, models, images, sampling_factor, quality) scores.append(score) sems.append(sem) print('Elapsed time: {:.6f} sec'.format(time.time() - start)) arch_scores[arch] = [scores, sems] plt.rcParams['xtick.direction'] = 'out' plt.rcParams['ytick.direction'] = 'out' plt.rcParams['font.size'] = 12 plt.rcParams['legend.fontsize'] = 12 title = 'scale ({}) '.format(args.downsampling_filter) title += 'noise{} ({}) '.format( args.noise_level, sampling_factor) if 'noise' in args.method else '' title += 'tta{}'.format(args.tta_level) if args.tta else '' plt.title(title) plt.xlabel('JPEG quality') plt.ylabel('PSNR [dB]') plt.ylim(27.5, 42) if args.method == 'scale': plt.xticks([50, 60, 70, 80, 90, 100], [50, 60, 70, 80, 90, 'lossless']) else: plt.xticks([50, 60, 70, 80, 90, 100]) plt.yticks([30, 35, 40]) plt.gca().yaxis.set_minor_locator(tick.MultipleLocator(2.5)) if args.method == 'noise_scale': if args.noise_level == 0: plt.axvspan(85, 100, color='b', alpha=0.1, lw=0) elif args.noise_level == 1: plt.axvspan(65, 90, color='b', alpha=0.1, lw=0) plt.grid(which='both', color='gray', linestyle='--') plt.gca().spines['right'].set_visible(False) plt.gca().spines['top'].set_visible(False) plt.gca().yaxis.set_ticks_position('left') plt.gca().xaxis.set_ticks_position('bottom') for key, value in arch_scores.items(): plt.errorbar(qualities, value[0], yerr=value[1], fmt='o-', capsize=3, label=key) plt.legend(loc='upper left', edgecolor='white') plt.show()
def main(): global args set_random_seed(args.seed) if not args.use_avai_gpus: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices use_gpu = torch.cuda.is_available() and not args.use_cpu log_name = 'test.log' if args.evaluate else 'train.log' log_name += time.strftime('-%Y-%m-%d-%H-%M-%S') sys.stdout = Logger(osp.join(args.save_dir, log_name)) print('** Arguments **') arg_keys = list(args.__dict__.keys()) arg_keys.sort() for key in arg_keys: print('{}: {}'.format(key, args.__dict__[key])) print('\n') print('Collecting env info ...') print('** System info **\n{}\n'.format(collect_env_info())) if use_gpu: torch.backends.cudnn.benchmark = True # torch.backends.cudnn.benchmark = False # torch.backends.cudnn.deterministic = True else: warnings.warn('Currently using CPU, however, GPU is highly recommended') # load data related args data_args = imagedata_kwargs(args) # initialize dataset dataset = init_image_dataset(name=data_args['source'], **data_args) # build data transformer transforms_tr, transforms_te = build_transforms(**data_args) # load train data trainset = dataset.train train_sampler = build_train_sampler( trainset, data_args['train_sampler'], batch_size=data_args['batch_size'], num_instances=data_args['num_instances'], num_train_pids=dataset.num_train_pids ) trainloader = torch.utils.data.DataLoader( DataWarpper(data=trainset, transforms=transforms_tr), sampler=train_sampler, batch_size=data_args['batch_size'], shuffle=False, num_workers=data_args['workers'], pin_memory=False, drop_last=True, ) # load test data queryset = dataset.query queryloader = torch.utils.data.DataLoader( DataWarpper(data=queryset, transforms=transforms_te), batch_size=data_args['batch_size'], shuffle=False, num_workers=data_args['workers'], pin_memory=False, drop_last=False ) galleryset = dataset.gallery galleryloader = torch.utils.data.DataLoader( DataWarpper(data=galleryset, transforms=transforms_te), batch_size=data_args['batch_size'], shuffle=False, num_workers=data_args['workers'], pin_memory=False, drop_last=False ) print('Building model: {}'.format(args.arch)) model = build_model( name=args.arch, num_classes=dataset.num_train_pids, pretrained=(not args.no_pretrained), use_gpu=use_gpu, batch_size=args.batch_size, part_num=args.part_num, part_weight=args.part_weight ) model = model.cuda() # num_params, flops = compute_model_complexity(model, (1, 3, args.height, args.width)) # print('Model complexity: params={:,} flops={:,}'.format(num_params, flops)) if args.load_weights and check_isfile(args.load_weights): load_pretrained_weights(model, args.load_weights) optimizer = build_optimizer(model, **optimizer_kwargs(args)) scheduler = build_lr_scheduler(optimizer, **lr_scheduler_kwargs(args)) model, optimizer = amp.initialize(model, optimizer, opt_level="O1", keep_batchnorm_fp32=None, loss_scale=None) if use_gpu: model = nn.DataParallel(model) if args.resume and check_isfile(args.resume): args.start_epoch = resume_from_checkpoint(args.resume, model, optimizer=optimizer) print('Building {}-engine for {}-reid'.format(args.loss, args.app)) engine = Engine(trainloader, queryloader, galleryloader, model, optimizer, scheduler, query=queryset, gallery=galleryset, use_gpu=use_gpu, num_train_pids=dataset.num_train_pids, **engine_kwargs(args)) engine.run(**engine_kwargs(args), use_gpu=use_gpu)
default=True, type=bool, help="Use xyz as features or not.") parser.add_argument('-len', '--length', default=-1, type=int, help="Number of partial samples for training and testing.") parser.add_argument('-da', '--data_arg', default=False, action='store_true', help="Whether to use data argumentation") if __name__ == "__main__": utils.set_random_seed(2020) args = parser.parse_args() print(args) args.length = None if args.length == -1 else args.length trainset = dataset.ModelNetDataset(mode="train", length=args.length) trainloader = DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=8) testset = dataset.ModelNetDataset(mode="test", length=args.length) testloader = DataLoader(testset, batch_size=args.batch_size, shuffle=False,