Example #1
0
    def generate_dataset_in_random_mode(self, n, description_file, seed=0):
        set_random_seed(seed)
        description = read_json_file(description_file)

        self.synthetic_dataset = DataFrame()
        for attr in description['attribute_description'].keys():
            attr_info = description['attribute_description'][attr]
            datatype = attr_info['data_type']
            is_categorical = attr_info['is_categorical']
            is_candidate_key = attr_info['is_candidate_key']
            minimum = attr_info['min']
            maximum = attr_info['max']
            static_num = attr_info['min'] if minimum == maximum else None
            if is_candidate_key:
                self.synthetic_dataset[attr] = parse_json(
                    attr_info).generate_values_as_candidate_key(n)
            elif is_categorical:
                self.synthetic_dataset[attr] = random.choice(
                    attr_info['distribution_bins'], n)
            elif datatype == 'String':
                length = static_num or random.randint(minimum, maximum)
                self.synthetic_dataset[attr] = length
                self.synthetic_dataset[attr] = self.synthetic_dataset[
                    attr].map(lambda x: generate_random_string(x))
            else:
                if datatype == 'Integer':
                    self.synthetic_dataset[
                        attr] = static_num or random.randint(
                            minimum, maximum + 1, n)
                else:
                    self.synthetic_dataset[
                        attr] = static_num or random.uniform(
                            minimum, maximum, n)
Example #2
0
    def generate_dataset_in_correlated_attribute_mode(self,
                                                      n,
                                                      description_file,
                                                      seed=0):
        set_random_seed(seed)
        self.n = n
        self.description = read_json_file(description_file)

        all_attributes = self.description['meta']['all_attributes']
        candidate_keys = set(self.description['meta']['candidate_keys'])
        self.encoded_dataset = DataGenerator.generate_encoded_dataset(
            self.n, self.description)
        self.synthetic_dataset = DataFrame(columns=all_attributes)
        for attr in all_attributes:
            attr_info = self.description['attribute_description'][attr]
            column = parse_json(attr_info)

            if attr in self.encoded_dataset:
                self.synthetic_dataset[
                    attr] = column.sample_values_from_binning_indices(
                        self.encoded_dataset[attr])
            elif attr in candidate_keys:
                self.synthetic_dataset[
                    attr] = column.generate_values_as_candidate_key(n)
            else:
                # for attributes not in BN or candidate keys, use independent attribute mode.
                binning_indices = column.sample_binning_indices_in_independent_attribute_mode(
                    n)
                self.synthetic_dataset[
                    attr] = column.sample_values_from_binning_indices(
                        binning_indices)
Example #3
0
def main():
    args = parse_args()
    cfg = Config.fromfile(args.config)

    if cfg.get('cudnn_benchmark', True):
        torch.backends.cudnn.benchmark = True
    if args.validate:
        cfg.validate = args.validate
    # cudnn default true

    # override by args
    if args.work_dir is not None:
        cfg.work_dir = args.work_dir

    if args.resume_from is not None:
        cfg.load_checkpoint = args.resume_from

    # init dist training
    if args.launcher == 'none':
        raise NotImplementedError
    else:
        # free_port = find_free_port()
        # dist_url = f'tcp://127.0.0.1:{free_port}'
        # only be used in pytorch dist mode
        init_dist(args.launcher)

    # init logger before other steps
    logger = get_root_logger(cfg.work_dir, cfg.log_level)
    logger.info(f'Distributed training with {args.world_size}')

    # set random seeds
    rank = args.local_rank
    if args.seed is not None:
        set_random_seed(args.seed + rank)

    if cfg.validate:  # validate means validate only
        # if you dont have ckpt
        final_validate_checkpoint = cfg.load_checkpoint
        assert final_validate_checkpoint is not None
        engine = Engine(cfg, logger, only_final_validate=True)
    else:
        engine = Engine(cfg, logger, only_final_validate=False)

        if cfg.load_model is not None:
            # pretrained finetuning
            engine.load_pretrained(cfg.load_model)

        if cfg.load_checkpoint is not None:
            engine.load_modelandstatus(cfg.load_checkpoint)

        engine.run()
        final_validate_checkpoint = os.path.join(cfg.work_dir, 'model_best.pth.tar')

    logger.info('doing the final validation')
    engine.load_modelandstatus(final_validate_checkpoint)
    engine.validate_epoch(final_validate=True)
    def describe_dataset_in_random_mode(
            self,
            dataset_file: str,
            attribute_to_datatype: Dict[str, DataType] = None,
            attribute_to_is_categorical: Dict[str, bool] = None,
            attribute_to_is_candidate_key: Dict[str, bool] = None,
            categorical_attribute_domain_file: str = None,
            numerical_attribute_ranges: Dict[str, List] = None,
            seed=0):
        attribute_to_datatype = attribute_to_datatype or {}
        attribute_to_is_categorical = attribute_to_is_categorical or {}
        attribute_to_is_candidate_key = attribute_to_is_candidate_key or {}
        numerical_attribute_ranges = numerical_attribute_ranges or {}

        if categorical_attribute_domain_file:
            categorical_attribute_to_domain = utils.read_json_file(
                categorical_attribute_domain_file)
        else:
            categorical_attribute_to_domain = {}

        utils.set_random_seed(seed)
        self.attr_to_datatype = {
            attr: DataType(datatype)
            for attr, datatype in attribute_to_datatype.items()
        }
        self.attr_to_is_categorical = attribute_to_is_categorical
        self.attr_to_is_candidate_key = attribute_to_is_candidate_key
        self.read_dataset_from_csv(dataset_file)
        self.infer_attribute_data_types()
        self.analyze_dataset_meta()
        self.represent_input_dataset_by_columns()

        for column in self.attr_to_column.values():
            attr_name = column.name
            if attr_name in categorical_attribute_to_domain:
                column.infer_domain(
                    categorical_domain=categorical_attribute_to_domain[
                        attr_name])
            elif attr_name in numerical_attribute_ranges:
                column.infer_domain(
                    numerical_range=numerical_attribute_ranges[attr_name])
            else:
                column.infer_domain()

        # record attribute information in json format
        self.data_description['attribute_description'] = {}
        for attr, column in self.attr_to_column.items():
            self.data_description['attribute_description'][
                attr] = column.to_json()
Example #5
0
    def generate_dataset_in_independent_mode(self, n, description_file, seed=0):
        set_random_seed(seed)
        self.description = read_json_file(description_file)

        all_attributes = self.description['meta']['all_attributes']
        candidate_keys = set(self.description['meta']['candidate_keys'])
        self.synthetic_dataset = DataFrame(columns=all_attributes)
        for attr in all_attributes:
            attr_info = self.description['attribute_description'][attr]
            column = parse_json(attr_info)

            if attr in candidate_keys:
                self.synthetic_dataset[attr] = column.generate_values_as_candidate_key(n)
            else:
                binning_indices = column.sample_binning_indices_in_independent_attribute_mode(n)
                self.synthetic_dataset[attr] = column.sample_values_from_binning_indices(binning_indices)
Example #6
0
def main():
    args = parse_args()
    cfg = Config.fromfile(args.config)

    if cfg.get('cudnn_benchmark', True):
        torch.backends.cudnn.benchmark = True
    if args.validate:
        cfg.validate = args.validate
    # cudnn default true

    # override by args
    if args.work_dir is not None:
        cfg.work_dir = args.work_dir

    if args.resume_from is not None:
        cfg.load_checkpoint = args.resume_from

    # init dist training
    if args.launcher == 'none':
        raise NotImplementedError
    else:
        init_dist(args.launcher)

    # init logger before other steps
    logger = get_root_logger(cfg.work_dir, cfg.log_level)
    logger.info(f'Distributed training with {args.world_size}')

    # set random seeds
    rank = args.local_rank
    if args.seed is not None:
        set_random_seed(args.seed + rank)

    engine = Engine(cfg, logger)

    if cfg.load_model is not None:
        engine.load_model(cfg.load_model)
    if cfg.load_checkpoint is not None:
        engine.load_modelandstatus(cfg.load_checkpoint)

    engine.run()
Example #7
0
p.add_argument('--downsampling_filters', nargs='+', default=['box'])
p.add_argument('--resize_blur_min', type=float, default=0.95)
p.add_argument('--resize_blur_max', type=float, default=1.05)
p.add_argument('--epoch', '-e', type=int, default=50)
p.add_argument('--inner_epoch', type=int, default=4)
p.add_argument('--finetune', '-f', default=None)
p.add_argument('--model_name', default=None)

args = p.parse_args()
if args.arch in srcnn.table:
    args.arch = srcnn.table[args.arch]


warnings.filterwarnings('ignore')
if __name__ == '__main__':
    utils.set_random_seed(args.seed, args.gpu)
    if args.color == 'y':
        ch = 1
        weight = (1.0,)
    elif args.color == 'rgb':
        ch = 3
        weight = (0.29891 * 3, 0.58661 * 3, 0.11448 * 3)
    weight = np.array(weight, dtype=np.float32)
    weight = weight[:, np.newaxis, np.newaxis]

    print('* loading filelist...', end=' ')
    filelist = utils.load_filelist(args.dataset_dir, shuffle=True)
    valid_num = int(np.ceil(args.validation_rate * len(filelist)))
    valid_list, train_list = filelist[:valid_num], filelist[valid_num:]
    print('done')
Example #8
0
def main():
    p = argparse.ArgumentParser(description='Chainer implementation of waifu2x')
    p.add_argument('--gpu', '-g', type=int, default=-1)
    p.add_argument('--seed', '-s', type=int, default=11)
    p.add_argument('--dataset_dir', '-d', required=True)
    p.add_argument('--validation_rate', type=float, default=0.05)
    p.add_argument('--nr_rate', type=float, default=0.65)
    p.add_argument('--chroma_subsampling_rate', type=float, default=0.5)
    p.add_argument('--reduce_memory_usage', action='store_true')
    p.add_argument('--out_size', type=int, default=64)
    p.add_argument('--max_size', type=int, default=256)
    p.add_argument('--active_cropping_rate', type=float, default=0.5)
    p.add_argument('--active_cropping_tries', type=int, default=10)
    p.add_argument('--random_half_rate', type=float, default=0.0)
    p.add_argument('--random_color_noise_rate', type=float, default=0.0)
    p.add_argument('--random_unsharp_mask_rate', type=float, default=0.0)
    p.add_argument('--learning_rate', type=float, default=0.00025)
    p.add_argument('--lr_min', type=float, default=0.00001)
    p.add_argument('--lr_decay', type=float, default=0.9)
    p.add_argument('--lr_decay_interval', type=int, default=5)
    p.add_argument('--batch_size', '-b', type=int, default=16)
    p.add_argument('--patches', '-p', type=int, default=64)
    p.add_argument('--validation_crop_rate', type=float, default=0.5)
    p.add_argument('--downsampling_filters', nargs='+', default=['box'])
    p.add_argument('--resize_blur_min', type=float, default=0.95)
    p.add_argument('--resize_blur_max', type=float, default=1.05)
    p.add_argument('--epoch', '-e', type=int, default=50)
    p.add_argument('--inner_epoch', type=int, default=4)
    p.add_argument('--finetune', '-f', default=None)
    p.add_argument('--model_name', default=None)
    p.add_argument('--color', '-c', default='rgb',
                   choices=['y', 'rgb'])
    p.add_argument('--arch', '-a', default='VGG7',
                   choices=['VGG7', '0', 'UpConv7', '1', 'ResNet10', '2', 'UpResNet10', '3'])
    p.add_argument('--method', '-m', default='scale',
                   choices=['noise', 'scale', 'noise_scale'],)
    p.add_argument('--noise_level', '-n', type=int, default=1,
                   choices=[0, 1, 2, 3])

    args = p.parse_args()
    if args.arch in srcnn.table:
        args.arch = srcnn.table[args.arch]

    utils.set_random_seed(args.seed, args.gpu)
    if args.color == 'y':
        ch = 1
        weight = (1.0,)
    elif args.color == 'rgb':
        ch = 3
        weight = (0.29891 * 3, 0.58661 * 3, 0.11448 * 3)
    weight = np.array(weight, dtype=np.float32)
    weight = weight[:, np.newaxis, np.newaxis]

    print('* loading filelist...', end=' ')
    filelist = utils.load_filelist(args.dataset_dir, shuffle=True)
    valid_num = int(np.ceil(args.validation_rate * len(filelist)))
    valid_list, train_list = filelist[:valid_num], filelist[valid_num:]
    print('done')

    print('* setup model...', end=' ')
    if args.model_name is None:
        if args.method == 'noise':
            model_name = 'anime_style_noise{}'.format(args.noise_level)
        elif args.method == 'scale':
            model_name = 'anime_style_scale'
        elif args.method == 'noise_scale':
            model_name = 'anime_style_noise{}_scale'.format(args.noise_level)
        model_path = '{}_{}.npz'.format(model_name, args.color)
    else:
        model_name = args.model_name.rstrip('.npz')
        model_path = model_name + '.npz'
    if not os.path.exists('epoch'):
        os.makedirs('epoch')

    model = srcnn.archs[args.arch](ch)
    if model.offset % model.inner_scale != 0:
        raise ValueError('offset %% inner_scale must be 0.')
    elif model.inner_scale != 1 and model.inner_scale % 2 != 0:
        raise ValueError('inner_scale must be 1 or an even number.')
    if args.finetune is not None:
        chainer.serializers.load_npz(args.finetune, model)

    if args.gpu >= 0:
        chainer.backends.cuda.check_cuda_available()
        chainer.backends.cuda.get_device(args.gpu).use()
        weight = chainer.backends.cuda.to_gpu(weight)
        model.to_gpu()

    optimizer = optimizers.Adam(alpha=args.learning_rate)
    optimizer.setup(model)
    print('done')

    valid_config = utils.get_config(args, model, train=False)
    train_config = utils.get_config(args, model, train=True)

    print('* check forward path...', end=' ')
    di = train_config.in_size
    do = train_config.out_size
    dx = model.xp.zeros((args.batch_size, ch, di, di), dtype=np.float32)
    dy = model(dx)
    if dy.shape[2:] != (do, do):
        raise ValueError('Invlid output size\n'
                         'Expect: {}\n'
                         'Actual: ({}, {})'.format(dy.shape[2:], do, do))
    print('done')

    print('* starting processes of dataset sampler...', end=' ')
    valid_queue = DatasetSampler(valid_list, valid_config)
    train_queue = DatasetSampler(train_list, train_config)
    print('done')

    best_count = 0
    best_score = 0
    best_loss = np.inf
    for epoch in range(0, args.epoch):
        print('### epoch: {} ###'.format(epoch))
        train_queue.reload_switch(init=(epoch < args.epoch - 1))
        for inner_epoch in range(0, args.inner_epoch):
            best_count += 1
            print('  # inner epoch: {}'.format(inner_epoch))
            start = time.time()
            train_loss = train_inner_epoch(
                model, weight, optimizer, train_queue, args.batch_size)
            if args.reduce_memory_usage:
                train_queue.wait()
            if train_loss < best_loss:
                best_loss = train_loss
                print('    * best loss on training dataset: {:.6f}'.format(
                    train_loss))
            valid_score = valid_inner_epoch(
                model, valid_queue, args.batch_size)
            if valid_score > best_score:
                best_count = 0
                best_score = valid_score
                print('    * best score on validation dataset: PSNR {:.6f} dB'
                      .format(valid_score))
                best_model = model.copy().to_cpu()
                epoch_path = 'epoch/{}_epoch{}.npz'.format(model_name, epoch)
                chainer.serializers.save_npz(model_path, best_model)
                shutil.copy(model_path, epoch_path)
            if best_count >= args.lr_decay_interval:
                best_count = 0
                optimizer.alpha *= args.lr_decay
                if optimizer.alpha < args.lr_min:
                    optimizer.alpha = args.lr_min
                else:
                    print('    * learning rate decay: {:.6f}'.format(
                        optimizer.alpha))
            print('    * elapsed time: {:.6f} sec'.format(time.time() - start))
Example #9
0
def main():
    p = argparse.ArgumentParser()
    p.add_argument('--gpu', '-g', type=int, default=-1)
    p.add_argument('--input', '-i', default='../images/original.png')
    p.add_argument('--arch', '-a', default='')
    p.add_argument('--tta', '-t', action='store_true')
    p.add_argument('--batch_size', '-b', type=int, default=16)
    p.add_argument('--block_size', '-l', type=int, default=128)
    p.add_argument('--chroma_subsampling', '-j', action='store_true')
    p.add_argument('--downsampling_filter', '-d', default='box')
    p.add_argument('--method',
                   '-m',
                   default='scale',
                   choices=['scale', 'noise_scale'])
    p.add_argument('--noise_level', '-n', type=int, default=1, choices=[0, 1])
    p.add_argument('--color', '-c', default='rgb', choices=['y', 'rgb'])
    p.add_argument('--tta_level', '-T', type=int, default=8, choices=[2, 4, 8])

    args = p.parse_args()
    if args.arch in srcnn.table:
        args.arch = srcnn.table[args.arch]

    utils.set_random_seed(0, args.gpu)

    if os.path.isdir(args.input):
        filelist = utils.load_filelist(args.input)
    else:
        filelist = [args.input]

    images = []
    for path in filelist:
        basename = os.path.basename(path)
        _, ext = os.path.splitext(basename)
        if ext.lower() in ['.png', '.bmp', '.tif', '.tiff']:
            img = Image.open(path).convert('RGB')
            w, h = img.size[:2]
            img = img.crop((0, 0, w - (w % 2), h - (h % 2)))
            images.append(img)

    qualities = [50, 60, 70, 80, 90, 100]
    sampling_factor = '1x1,1x1,1x1'
    if args.chroma_subsampling:
        sampling_factor = '2x2,1x1,1x1'

    arch_scores = {}
    for arch in srcnn.table.values():
        args.arch = arch
        models = load_models(args)
        scores = []
        sems = []
        for quality in qualities:
            print(arch, quality)
            start = time.time()
            score, sem = benchmark(args, models, images, sampling_factor,
                                   quality)
            scores.append(score)
            sems.append(sem)
            print('Elapsed time: {:.6f} sec'.format(time.time() - start))
        arch_scores[arch] = [scores, sems]

    plt.rcParams['xtick.direction'] = 'out'
    plt.rcParams['ytick.direction'] = 'out'
    plt.rcParams['font.size'] = 12
    plt.rcParams['legend.fontsize'] = 12

    title = 'scale ({}) '.format(args.downsampling_filter)
    title += 'noise{} ({}) '.format(
        args.noise_level, sampling_factor) if 'noise' in args.method else ''
    title += 'tta{}'.format(args.tta_level) if args.tta else ''
    plt.title(title)
    plt.xlabel('JPEG quality')
    plt.ylabel('PSNR [dB]')
    plt.ylim(27.5, 42)
    if args.method == 'scale':
        plt.xticks([50, 60, 70, 80, 90, 100], [50, 60, 70, 80, 90, 'lossless'])
    else:
        plt.xticks([50, 60, 70, 80, 90, 100])
    plt.yticks([30, 35, 40])
    plt.gca().yaxis.set_minor_locator(tick.MultipleLocator(2.5))
    if args.method == 'noise_scale':
        if args.noise_level == 0:
            plt.axvspan(85, 100, color='b', alpha=0.1, lw=0)
        elif args.noise_level == 1:
            plt.axvspan(65, 90, color='b', alpha=0.1, lw=0)
    plt.grid(which='both', color='gray', linestyle='--')
    plt.gca().spines['right'].set_visible(False)
    plt.gca().spines['top'].set_visible(False)
    plt.gca().yaxis.set_ticks_position('left')
    plt.gca().xaxis.set_ticks_position('bottom')
    for key, value in arch_scores.items():
        plt.errorbar(qualities,
                     value[0],
                     yerr=value[1],
                     fmt='o-',
                     capsize=3,
                     label=key)
    plt.legend(loc='upper left', edgecolor='white')
    plt.show()
Example #10
0
def main():
    global args

    set_random_seed(args.seed)
    if not args.use_avai_gpus:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available() and not args.use_cpu
    log_name = 'test.log' if args.evaluate else 'train.log'
    log_name += time.strftime('-%Y-%m-%d-%H-%M-%S')
    sys.stdout = Logger(osp.join(args.save_dir, log_name))
    print('** Arguments **')
    arg_keys = list(args.__dict__.keys())
    arg_keys.sort()
    for key in arg_keys:
        print('{}: {}'.format(key, args.__dict__[key]))
    print('\n')
    print('Collecting env info ...')
    print('** System info **\n{}\n'.format(collect_env_info()))
    if use_gpu:
        torch.backends.cudnn.benchmark = True
        # torch.backends.cudnn.benchmark = False
        # torch.backends.cudnn.deterministic = True
    else:
        warnings.warn('Currently using CPU, however, GPU is highly recommended')

    # load data related args
    data_args = imagedata_kwargs(args)

    # initialize dataset
    dataset = init_image_dataset(name=data_args['source'], **data_args)

    # build data transformer
    transforms_tr, transforms_te = build_transforms(**data_args)

    # load train data
    trainset = dataset.train
    train_sampler = build_train_sampler(
        trainset, data_args['train_sampler'],
        batch_size=data_args['batch_size'],
        num_instances=data_args['num_instances'],
        num_train_pids=dataset.num_train_pids
    )
    trainloader = torch.utils.data.DataLoader(
        DataWarpper(data=trainset, transforms=transforms_tr),
        sampler=train_sampler,
        batch_size=data_args['batch_size'],
        shuffle=False,
        num_workers=data_args['workers'],
        pin_memory=False,
        drop_last=True,
    )

    # load test data
    queryset = dataset.query
    queryloader = torch.utils.data.DataLoader(
        DataWarpper(data=queryset, transforms=transforms_te),
        batch_size=data_args['batch_size'],
        shuffle=False,
        num_workers=data_args['workers'],
        pin_memory=False,
        drop_last=False
    )

    galleryset = dataset.gallery
    galleryloader = torch.utils.data.DataLoader(
        DataWarpper(data=galleryset, transforms=transforms_te),
        batch_size=data_args['batch_size'],
        shuffle=False,
        num_workers=data_args['workers'],
        pin_memory=False,
        drop_last=False
    )

    print('Building model: {}'.format(args.arch))
    model = build_model(
        name=args.arch,
        num_classes=dataset.num_train_pids,
        pretrained=(not args.no_pretrained),
        use_gpu=use_gpu,
        batch_size=args.batch_size,
        part_num=args.part_num,
        part_weight=args.part_weight
    )
    model = model.cuda()

    # num_params, flops = compute_model_complexity(model, (1, 3, args.height, args.width))
    # print('Model complexity: params={:,} flops={:,}'.format(num_params, flops))

    if args.load_weights and check_isfile(args.load_weights):
        load_pretrained_weights(model, args.load_weights)

    optimizer = build_optimizer(model, **optimizer_kwargs(args))

    scheduler = build_lr_scheduler(optimizer, **lr_scheduler_kwargs(args))

    model, optimizer = amp.initialize(model, optimizer,
                                      opt_level="O1",
                                      keep_batchnorm_fp32=None,
                                      loss_scale=None)

    if use_gpu:
        model = nn.DataParallel(model)

    if args.resume and check_isfile(args.resume):
        args.start_epoch = resume_from_checkpoint(args.resume, model, optimizer=optimizer)

    print('Building {}-engine for {}-reid'.format(args.loss, args.app))
    engine = Engine(trainloader, queryloader, galleryloader, model, optimizer, scheduler,
                    query=queryset, gallery=galleryset, use_gpu=use_gpu, num_train_pids=dataset.num_train_pids, **engine_kwargs(args))
    engine.run(**engine_kwargs(args), use_gpu=use_gpu)
Example #11
0
                    default=True,
                    type=bool,
                    help="Use xyz as features or not.")
parser.add_argument('-len',
                    '--length',
                    default=-1,
                    type=int,
                    help="Number of partial samples for training and testing.")
parser.add_argument('-da',
                    '--data_arg',
                    default=False,
                    action='store_true',
                    help="Whether to use data argumentation")

if __name__ == "__main__":
    utils.set_random_seed(2020)

    args = parser.parse_args()
    print(args)
    args.length = None if args.length == -1 else args.length

    trainset = dataset.ModelNetDataset(mode="train", length=args.length)
    trainloader = DataLoader(trainset,
                             batch_size=args.batch_size,
                             shuffle=True,
                             num_workers=8)

    testset = dataset.ModelNetDataset(mode="test", length=args.length)
    testloader = DataLoader(testset,
                            batch_size=args.batch_size,
                            shuffle=False,