Ejemplo n.º 1
0
def main():
    import argparse, os
    parser = argparse.ArgumentParser()
    # Common arguments
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--encode_bs', type=int, default=64)
    parser.add_argument('--black_box_jacobian_bs', type=int, default=None)
    parser.add_argument('--cpu', action='store_true')
    # Dataset arguments
    parser.add_argument('--dataset', type=str, required=True)
    parser.add_argument('--limit_dataset_size', type=int, default=None)
    parser.add_argument('--cifar10_data_path', type=str, default='data')
    parser.add_argument('--imagenet32_data_path',
                        type=str,
                        default='~/data/imagenet-small/valid_32x32.npy')
    parser.add_argument('--imagenet64_data_path',
                        type=str,
                        default='~/data/imagenet-small/valid_64x64.npy')
    # Model arguments
    parser.add_argument('--cifar10_model',
                        type=str,
                        default='~/data/flowpp_cifar_model.npz')
    parser.add_argument('--imagenet32_model',
                        type=str,
                        default='~/data/flowpp_imagenet32_model.npz')
    parser.add_argument('--imagenet64_model',
                        type=str,
                        default='~/data/flowpp_imagenet64_model.npz')
    # Script mode
    parser.add_argument('--mode', type=str, required=True)
    parser.add_argument('--test_output_filename', type=str, default=None)
    parser.add_argument('--timing_test_count', type=int, default=6)
    # Default compression options
    parser.add_argument('--neg_log_noise_scale', type=int, default=14)
    parser.add_argument('--disc_bits', type=int, default=32)
    parser.add_argument('--disc_range', type=int, default=256)
    parser.add_argument('--ans_init_bits', type=int, default=10000000)
    parser.add_argument('--ans_num_streams', type=int, default=16)
    parser.add_argument('--ans_mass_bits', type=int,
                        default=60)  # probably never need to change this
    args = parser.parse_args()

    setup(seed=args.seed)

    # Load data
    if args.dataset == 'imagenet32':
        model_ctor = compression.models.load_imagenet32_model
        model_filename = os.path.expanduser(args.imagenet32_model)
        dataset = load_imagenet_data(
            os.path.expanduser(args.imagenet32_data_path))
    elif args.dataset == 'imagenet64':
        model_ctor = compression.models.load_imagenet64_model
        model_filename = os.path.expanduser(args.imagenet64_model)
        dataset = load_imagenet_data(
            os.path.expanduser(args.imagenet64_data_path))
    elif args.dataset == 'cifar10':
        model_ctor = compression.models.load_cifar_model
        model_filename = os.path.expanduser(args.cifar10_model)
        dataset = CIFAR10WithoutLabels(
            root=os.path.expanduser(args.cifar10_data_path),
            train=False,
            download=True,
            transform=torchvision.transforms.Compose([
                torchvision.transforms.ToTensor(),
                torchvision.transforms.Lambda(
                    lambda x_: (x_ * 255).to(dtype=torch.int64)),
            ]))
    else:
        raise NotImplementedError(args.dataset)
    dataloader, dataset = make_testing_dataloader(
        dataset,
        seed=args.seed,
        limit_dataset_size=args.limit_dataset_size,
        bs=args.encode_bs)

    # Load model
    device = torch.device('cpu' if args.cpu else 'cuda')
    model = model_ctor(model_filename,
                       force_float32_cond=True).to(device=device)

    # Dispatch to the chosen mode's main function

    if args.mode == 'val_only':
        num_datapoints_processed = main_val(dataloader, model, device)
        assert num_datapoints_processed == len(dataset)

    else:

        def _make_stream(total_init_bits_=None):
            return Bitstream(
                device=device,
                noise_scale=2**(-args.neg_log_noise_scale),
                disc_bits=args.disc_bits,
                disc_range=args.disc_range,
                ans_mass_bits=args.ans_mass_bits,
                ans_init_seed=0,
                ans_init_bits=(
                    int(np.ceil(total_init_bits_ / args.ans_num_streams))
                    if total_init_bits_ is not None else args.
                    ans_init_bits  # the --ans_init_bits argument value is the default
                ),
                ans_num_streams=args.ans_num_streams)

        if args.mode == 'test':
            assert args.test_output_filename is not None
            output = {
                'args':
                vars(args),
                'results':
                main_compression_test(
                    stream=_make_stream(),
                    model=model,
                    dataloader=dataloader,
                    device=device,
                )
            }
            with open(args.test_output_filename, 'w') as f:
                f.write(json.dumps(output) + '\n')

        elif args.mode == 'timing_test_compositional':
            batches = []
            for (x_raw, ) in dataloader:
                batches.append(x_raw)
                if len(batches) >= args.timing_test_count:
                    break
            main_timing_test(batches,
                             model=model,
                             stream=_make_stream(),
                             device=device)

        elif args.mode == 'timing_test_blackbox':
            assert args.black_box_jacobian_bs is not None
            datapoints, = next(iter(dataloader))
            datapoints = datapoints[:args.timing_test_count]
            assert len(datapoints) == args.timing_test_count
            from compression.logistic import force_accurate_mixlogistic_invcdf
            with force_accurate_mixlogistic_invcdf():
                main_timing_test_blackbox(
                    datapoints=datapoints.to(device=device),
                    model=model,
                    bbstream=BlackBoxBitstream(
                        model_ctor=model_ctor,
                        model_filename=model_filename,
                        device=device,
                        jacobian_bs=args.black_box_jacobian_bs))
        else:
            raise NotImplementedError(args.mode)
import torch

import numpy as np
import pandas as pd
from torch.utils.data import Dataset
from torchvision.transforms import transforms
from tqdm import tqdm
from encode import compress

from compression.utils import load_imagenet_data
from torchvision.utils import save_image

input = '/home/crhistyan/data/imagenet-small/valid_64x64.npy'
out_dir = 'real_image_size'
os.makedirs(out_dir, exist_ok=True)
dataset = load_imagenet_data(os.path.expanduser(input))
dataloader = torch.utils.data.DataLoader(dataset,
                                         batch_size=2,
                                         shuffle=False,
                                         drop_last=False)

sizes = []
webp_sizes = []
flif_sizes = []
for i_batch, (x_raw, ) in enumerate(tqdm(dataloader)):
    batch_size = 0
    webp_batch_size = 0
    flif_batch_size = 0
    for i, tensor in enumerate(x_raw):
        image_fn = os.path.join(out_dir, f'{i}.png')
        save_image(tensor.type(torch.float32) / 255, image_fn)
Ejemplo n.º 3
0
def run(args):
    print('\nMODEL SETTINGS: \n', args, '\n')
    print("Random Seed: ", args.manual_seed)

    # ==================================================================================================================
    # SNAPSHOTS
    # ==================================================================================================================
    args.model_signature = str(datetime.datetime.now())[0:19].replace(' ', '_')
    args.model_signature = args.model_signature.replace(':', '_')

    os.makedirs(args.out_dir, exist_ok=True)
    snap_dir = args.out_dir

    with open(os.path.join(snap_dir, 'log.txt'), 'a') as ff:
        print('\nMODEL SETTINGS: \n', args, '\n', file=ff)

    # SAVING
    torch.save(args, snap_dir + '.config')

    # Load snapshot parameters
    parameters_dict = None
    if args.state_parameters is not None:
        assert os.path.isfile(args.state_parameters)
        parameters_dict = json.load(open(args.state_parameters))
        args.learning_rate = parameters_dict['scheduler']['_last_lr'][0]

    args.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print('Device:', args.device)

    # ==================================================================================================================
    # LOAD DATA
    # ==================================================================================================================
    dataset = load_imagenet_data(os.path.expanduser(args.imagenet64_data_path))
    validation_dataset = load_imagenet_data(os.path.expanduser(args.imagenet64_valid_data_path))

    train_loader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=True, drop_last=False)
    val_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=args.batch_size, shuffle=True,
                                             drop_last=False)

    # test_loader = torch.utils.data.DataLoader(
    #     dataset,
    #     batch_size=args.batch_size,
    #     shuffle=False,
    #     **kwargs)

    args.input_size = [3, 64, 64]
    # ==================================================================================================================
    # SELECT MODEL
    # ==================================================================================================================
    # flow parameters and architecture choice are passed on to model through args
    print(args.input_size)

    from compression.models.load_flowpp_imagenet64 import Imagenet64Model

    # Load model
    if args.imagenet64_model is None:
        model = Imagenet64Model(force_float32_cond=True).eval()
    else:
        model_ctor = compression.models.load_imagenet64_model
        model_filename = os.path.expanduser(args.imagenet64_model)
        model = model_ctor(model_filename, force_float32_cond=True, from_torch=args.from_torch)

    model.to(device=args.device)

    model_sample = model

    optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=args.gamma)

    # ==================================================================================================================
    # TRAINING
    # ==================================================================================================================
    train_bpd = []
    val_bpd = []

    # for early stopping
    best_val_bpd = np.inf
    best_val_loss = np.inf

    if args.state_parameters is None:
        last_epoch = 1
        run_number = 1
    else:
        last_epoch = parameters_dict['epoch']
        run_number = parameters_dict['run_number'] + 1
        scheduler.load_state_dict(parameters_dict['scheduler'])

    train_times = []
    model.double()

    for epoch in range(last_epoch, args.epochs + 1):
        t_start = time.time()
        if parameters_dict is not None:
            tr_loss, tr_bpd = train(epoch, train_loader, model, optimizer, args, scheduler,
                                    True, parameters_dict['batch_idx'], run_number)
        else:
            tr_loss, tr_bpd = train(epoch, train_loader, model, optimizer, args, scheduler, False)
        train_bpd.append(tr_bpd)
        train_times.append(time.time() - t_start)
        print('One training epoch took %.2f seconds' % (time.time() - t_start))

        if epoch < 5 or epoch % args.evaluate_interval_epochs == 0:
            v_loss, v_bpd = evaluate(
                val_loader, model, model_sample, args,
                epoch=epoch, file=snap_dir + 'log.txt')

            val_bpd.append(v_bpd)

            best_val_bpd = min(v_bpd, best_val_bpd)
            best_val_loss = min(v_loss, best_val_loss)

            print('(BEST: val bpd {:.4f}, val loss {:.4f})\n'.format(best_val_bpd, best_val_loss))
            print(f'VALIDATION: loss: {v_loss}, bpd: {v_bpd}')

            if math.isnan(v_loss):
                raise ValueError('NaN encountered!')

    train_bpd = np.hstack(train_bpd)
    val_bpd = np.array(val_bpd)

    # training time per epoch
    train_times = np.array(train_times)
    mean_train_time = np.mean(train_times)
    std_train_time = np.std(train_times, ddof=1)
    print('Average train time per epoch: %.2f +/- %.2f' % (mean_train_time, std_train_time))

    # ==================================================================================================================
    # EVALUATION
    # ==================================================================================================================
    final_model = torch.load(snap_dir + 'a.model')
    test_loss, test_bpd = evaluate(
        train_loader, test_loader, final_model, final_model, args,
        epoch=epoch, file=snap_dir + 'test_log.txt')

    print('Test loss / bpd: %.2f / %.2f' % (test_loss, test_bpd))
Ejemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser()
    # Common arguments
    parser.add_argument('input', type=str)
    parser.add_argument('--cpu', action='store_true')
    parser.add_argument('--imagenet64_model', type=str, default='~/data/flowpp_imagenet64_model.npz')
    parser.add_argument('-o', '--out_dir', type=str, default='lbb_compression')
    parser.add_argument('--encode_out', type=str, default='image')
    parser.add_argument('--decode_out', type=str, default='image.png')
    parser.add_argument('--write_crops', action='store_true')
    parser.add_argument('--single_image', action='store_true')
    parser.add_argument('--batch_size', type=int, default=1)

    # Default compression options
    parser.add_argument('--neg_log_noise_scale', type=int, default=14)
    parser.add_argument('--disc_bits', type=int, default=32)
    parser.add_argument('--disc_range', type=int, default=256)
    parser.add_argument('--ans_init_bits', type=int, default=10000000)
    parser.add_argument('--ans_num_streams', type=int, default=1)
    parser.add_argument('--ans_mass_bits', type=int, default=60)  # probably never need to change this
    args = parser.parse_args()

    device = torch.device('cpu' if args.cpu else 'cuda')
    args.device = device
    print(f'Using {device}')

    # Load model
    model_fn = os.path.expanduser(args.imagenet64_model)
    assert os.path.exists(model_fn), 'Model file not found'

    model_ctor = compression.models.load_imagenet64_model
    model = model_ctor(model_fn, force_float32_cond=False)

    # Prepare output
    args.out_dir = os.path.expanduser(args.out_dir)
    os.makedirs(args.out_dir, exist_ok=True)
    args.encode_out = os.path.join(args.out_dir, args.encode_out)
    args.decode_out = os.path.join(args.out_dir, args.decode_out)

    # Input files
    input_fn = os.path.expanduser(args.input)
    if args.single_image:
        assert os.path.exists(input_fn), 'Input image not found'
        print(f'Reading image from: {input_fn}')
        dataset, bs = load_image_and_crop(input_fn)

        dataloader, dataset = make_testing_dataloader(
            dataset, seed=0, limit_dataset_size=0, bs=args.batch_size
        )
    else:
        dataset = load_imagenet_data(os.path.expanduser(args.input))
        print('Number of images:', len(dataset))
        dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, drop_last=False)

    for param in model.parameters():
        param.requires_grad = False

    # Prepare output file
    out_fn = os.path.join(args.out_dir, 'disk_write_evaluation.csv')
    file = open(out_fn, 'w+')
    columns = ['iteration', 'image_number', 'file_size', 'bpd']
    file.write(','.join(columns) + '\n')
    file.close()

    stream = make_stream(vars(args))
    ok = encode(stream=stream, model=model, dataloader=dataloader, device=device,
                write_crops=args.write_crops, output_fn=args.encode_out, eval_fn=out_fn, batch_size=args.batch_size)
    if ok:
        print('Encode success')

    ok = decode(args.encode_out, model, write_crops=args.write_crops,
                stream=make_stream({'ans_init_bits': 2}))
    if ok:
        print('Decode success')
Ejemplo n.º 5
0
def run(args):
    print('\nMODEL SETTINGS: \n', args, '\n')
    print("Random Seed: ", args.manual_seed)

    # ==================================================================================================================
    # SNAPSHOTS
    # ==================================================================================================================
    args.model_signature = str(datetime.datetime.now())[0:19].replace(' ', '_')
    args.model_signature = args.model_signature.replace(':', '_')

    os.makedirs(args.out_dir, exist_ok=True)
    snap_dir = args.out_dir

    with open(os.path.join(snap_dir, 'log.txt'), 'a') as ff:
        print('\nMODEL SETTINGS: \n', args, '\n', file=ff)

    # SAVING
    torch.save(args, snap_dir + '.config')

    # ==================================================================================================================
    # LOAD DATA
    # ==================================================================================================================
    validation_dataset = load_imagenet_data(
        os.path.expanduser(args.imagenet64_valid_data_path))
    val_loader = torch.utils.data.DataLoader(validation_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=True,
                                             drop_last=False)
    args.input_size = [3, 64, 64]

    # ==================================================================================================================
    # SELECT MODEL
    # ==================================================================================================================
    # flow parameters and architecture choice are passed on to model through args
    print(args.input_size)

    from compression.models.load_flowpp_imagenet64 import Imagenet64Model
    args.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    models_dir = os.path.expanduser(args.imagenet64_model_dir)
    filenames = glob.glob(os.path.join(models_dir, '*.npz'))

    out_fn = 'validation.csv'
    file = open(out_fn, 'w+')
    columns = ['val_bpd', 'val_loss']
    file.write(','.join(columns) + '\n')
    file.close()

    val_bpd = []
    val_loss = []
    for model_filename in filenames:
        # Load model
        model_ctor = compression.models.load_imagenet64_model
        model_filename = os.path.expanduser(model_filename)
        model = model_ctor(model_filename,
                           force_float32_cond=True,
                           from_torch=True)

        model.to(device=args.device)
        print('Device:', args.device)

        model_sample = model
        model.double()

        v_loss, v_bpd = evaluate(val_loader,
                                 model,
                                 model_sample,
                                 args,
                                 file=snap_dir + 'log.txt')
        val_loss.append(v_loss)
        val_bpd.append(v_bpd)
        file = open(out_fn, 'a+')
        v_bpd_str = f'{v_bpd}'
        v_loss_str = f'{v_loss}'
        columns = [v_bpd_str, v_loss_str]
        file.write(','.join(columns) + '\n')
        file.close()
        print(f'VALIDATION: loss: {v_loss}, bpd: {v_bpd}')

        if math.isnan(v_loss):
            raise ValueError('NaN encountered!')

    val_bpd = np.array(val_bpd)
    val_loss = np.array(val_loss)

    name = 'Validation BPD'
    fig, ax = plt.subplots()
    t = range(val_bpd.shape[0])
    ax.plot(t, val_bpd)
    ax.set(xlabel='batch_id', ylabel='', title=name)
    plt.show()
    fig.savefig(os.path.join(args.out_dir, f"{name}.png"))

    name = 'Validation Loss'
    fig, ax = plt.subplots()
    t = range(val_loss.shape[0])
    ax.plot(t, val_loss)
    ax.set(xlabel='batch_id', ylabel='', title=name)
    plt.show()
    fig.savefig(os.path.join(args.out_dir, f"{name}.png"))
Ejemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser()
    # Common arguments
    parser.add_argument('input', type=str)
    parser.add_argument('--cpu', action='store_true')
    parser.add_argument('--from_torch', action='store_true')
    parser.add_argument('--batch_size', type=int, default=2)
    parser.add_argument('--imagenet64_model',
                        type=str,
                        default='~/data/flowpp_imagenet64_model.npz')
    parser.add_argument('-o', '--out_dir', type=str, default='lbb_evaluation')
    args = parser.parse_args()

    device = torch.device('cpu' if args.cpu else 'cuda')
    print(f'Using {device}')

    # Load model
    model_fn = os.path.expanduser(args.imagenet64_model)
    assert os.path.exists(model_fn), 'Model file not found'

    model_ctor = compression.models.load_imagenet64_model
    model = model_ctor(model_fn,
                       force_float32_cond=False,
                       from_torch=args.from_torch)
    for param in model.parameters():
        param.requires_grad = False

    # Prepare output
    args.out_dir = os.path.expanduser(args.out_dir)
    os.makedirs(args.out_dir, exist_ok=True)

    # Input files
    input_fn = os.path.expanduser(args.input)
    assert os.path.exists(input_fn), 'Input file not found'
    print(f'Reading images from: {input_fn}')

    out_fn = os.path.join('evaluation.csv')
    file = open(out_fn, 'w+')
    columns = ['iteration', 'min_bits', 'stream_len']
    file.write(','.join(columns) + '\n')
    file.close()

    batch_test = False
    if batch_test:
        # Read input image
        dataset, num_crops = load_image_and_crop(input_fn)
        for bs in range(1, 20):
            dataloader, dataset = make_testing_dataloader(dataset,
                                                          seed=0,
                                                          limit_dataset_size=0,
                                                          bs=bs)
            stream = lbb.make_stream({'ans_init_bits': 10000000})

            ok = lbb.encode(stream=stream,
                            model=model,
                            dataloader=dataloader,
                            device=device,
                            output_fn='image',
                            write=False,
                            run_once=True)
            dict_stream = stream.to_dict(orig=True)

            file = open(out_fn, 'a+')
            columns = [
                str(bs),
                str(dict_stream['ans'][0]['min_size']),
                str(len(stream))
            ]
            print(columns)
            file.write(','.join(columns) + '\n')
            file.close()
    else:
        dataset = load_imagenet_data(os.path.expanduser(args.input))
        dataloader = torch.utils.data.DataLoader(dataset,
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 drop_last=False)
        stream = lbb.make_stream({'ans_init_bits': 10000000})

        for i_batch, (x_raw, ) in enumerate(dataloader):
            ok = lbb.encode_image(stream, model, device, i_batch, x_raw)
            dict_stream = stream.to_dict(orig=True)

            file = open(out_fn, 'a+')
            columns = [
                str(args.batch_size * (i_batch + 1)),
                str(dict_stream['ans'][0]['min_size']),
                str(len(stream))
            ]
            print(columns)
            file.write(','.join(columns) + '\n')
            file.close()