Exemple #1
0
def main():
    # training settings
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    # general
    parser.add_argument(
        '--gpu',
        type=str,
        default='0',
        metavar='ID',
        help="ID of the GPU to train on (or '' to train on CPU)")

    # data
    parser.add_argument('--n-train',
                        type=int,
                        default=49000,
                        metavar='N',
                        help='number of training examples')
    parser.add_argument('--n-val',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='number of validation examples')
    parser.add_argument('--data-path',
                        type=str,
                        default='../data/',
                        metavar='PATH',
                        help='directory for storing augmented data etc.')

    # common for RBMs and DBM
    parser.add_argument('--n-gibbs-steps',
                        type=int,
                        default=(1, 1, 1),
                        metavar='N',
                        nargs='+',
                        help='(initial) number of Gibbs steps for CD/PCD')
    parser.add_argument('--lr',
                        type=float,
                        default=(5e-4, 1e-4, 8e-5),
                        metavar='LR',
                        nargs='+',
                        help='(initial) learning rates')
    parser.add_argument('--epochs',
                        type=int,
                        default=(120, 180, 1500),
                        metavar='N',
                        nargs='+',
                        help='number of epochs to train')
    parser.add_argument('--batch-size', type=int, default=(100, 100, 100), metavar='B', nargs='+',
                        help='input batch size for training, `--n-train` and `--n-val`' + \
                             'must be divisible by this number (for DBM)')
    parser.add_argument('--l2',
                        type=float,
                        default=(0.01, 0.05, 1e-8),
                        metavar='L2',
                        nargs='+',
                        help='L2 weight decay coefficients')
    parser.add_argument('--random-seed',
                        type=int,
                        default=(1337, 1111, 2222),
                        metavar='N',
                        nargs='+',
                        help='random seeds for models training')

    # save dirpaths
    parser.add_argument('--grbm-dirpath',
                        type=str,
                        default='../models/grbm_cifar_naive/',
                        metavar='DIRPATH',
                        help='directory path to save Gaussian RBM')
    parser.add_argument('--mrbm-dirpath',
                        type=str,
                        default='../models/mrbm_cifar_naive/',
                        metavar='DIRPATH',
                        help='directory path to save Multinomial RBM')
    parser.add_argument('--dbm-dirpath',
                        type=str,
                        default='../models/dbm_cifar_naive/',
                        metavar='DIRPATH',
                        help='directory path to save DBM')

    # DBM related
    parser.add_argument('--n-particles',
                        type=int,
                        default=100,
                        metavar='M',
                        help='number of persistent Markov chains')
    parser.add_argument(
        '--max-mf-updates',
        type=int,
        default=50,
        metavar='N',
        help='maximum number of mean-field updates per weight update')
    parser.add_argument('--mf-tol',
                        type=float,
                        default=1e-11,
                        metavar='TOL',
                        help='mean-field tolerance')
    parser.add_argument('--max-norm',
                        type=float,
                        default=4.,
                        metavar='C',
                        help='maximum norm constraint')

    # MLP related
    parser.add_argument('--mlp-no-init',
                        action='store_true',
                        help='if enabled, use random initialization')
    parser.add_argument('--mlp-l2',
                        type=float,
                        default=1e-4,
                        metavar='L2',
                        help='L2 weight decay coefficient')
    parser.add_argument('--mlp-lrm',
                        type=float,
                        default=(0.1, 1.),
                        metavar='LRM',
                        nargs='+',
                        help='learning rate multipliers of 1e-3')
    parser.add_argument('--mlp-epochs',
                        type=int,
                        default=100,
                        metavar='N',
                        help='number of epochs to train')
    parser.add_argument(
        '--mlp-val-metric',
        type=str,
        default='val_acc',
        metavar='S',
        help=
        "metric on validation set to perform early stopping, {'val_acc', 'val_loss'}"
    )
    parser.add_argument('--mlp-batch-size',
                        type=int,
                        default=128,
                        metavar='N',
                        help='input batch size for training')
    parser.add_argument('--mlp-dropout',
                        type=float,
                        default=0.64,
                        metavar='P',
                        help='probability of visible units being set to zero')
    parser.add_argument('--mlp-save-prefix',
                        type=str,
                        default='../data/grbm_naive_',
                        metavar='PREFIX',
                        help='prefix to save MLP predictions and targets')

    # parse and check params
    args = parser.parse_args()
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    for x, m in (
        (args.n_gibbs_steps, 3),
        (args.lr, 3),
        (args.epochs, 3),
        (args.batch_size, 3),
        (args.l2, 3),
        (args.random_seed, 3),
    ):
        if len(x) == 1:
            x *= m

    # prepare data (load + scale + split)
    print "\nPreparing data ..."
    X, y = load_cifar10(mode='train', path=args.data_path)
    X = X.astype(np.float32)
    X /= 255.
    RNG(seed=42).shuffle(X)
    RNG(seed=42).shuffle(y)
    n_train = min(len(X), args.n_train)
    n_val = min(len(X), args.n_val)
    X_train = X[:n_train]
    X_val = X[-n_val:]
    y_train = y[:n_train]
    y_val = y[-n_val:]

    # remove 1000 least significant singular values
    X_train = make_smoothing(X_train, n_train, args)
    print X_train.shape

    # center and normalize training data
    X_s_mean = X_train.mean(axis=0)
    X_s_std = X_train.std(axis=0)
    mean_path = os.path.join(args.data_path, 'X_s_mean.npy')
    std_path = os.path.join(args.data_path, 'X_s_std.npy')
    if not os.path.isfile(mean_path):
        np.save(mean_path, X_s_mean)
    if not os.path.isfile(std_path):
        np.save(std_path, X_s_std)

    X_train -= X_s_mean
    X_train /= X_s_std
    X_val -= X_s_mean
    X_val /= X_s_std
    print "Mean: ({0:.3f}, ...); std: ({1:.3f}, ...)".format(
        X_train.mean(axis=0)[0],
        X_train.std(axis=0)[0])
    print "Range: ({0:.3f}, {1:.3f})\n\n".format(X_train.min(), X_train.max())

    # pre-train Gaussian RBM
    grbm = make_grbm((X_train, X_val), args)

    # extract features Q = p_{G-RBM}(h|v=X)
    print "\nExtracting features from G-RBM ...\n\n"
    Q_train, Q_val = None, None
    if not os.path.isdir(args.mrbm_dirpath) or not os.path.isdir(
            args.dbm_dirpath):
        Q_train_path = os.path.join(args.data_path, 'Q_train_cifar_naive.npy')
        Q_train = make_rbm_transform(grbm, X_train, Q_train_path)
    if not os.path.isdir(args.mrbm_dirpath):
        Q_val_path = os.path.join(args.data_path, 'Q_val_cifar_naive.npy')
        Q_val = make_rbm_transform(grbm, X_val, Q_val_path)

    # pre-train Multinomial RBM (M-RBM)
    mrbm = make_mrbm((Q_train, Q_val), args)

    # extract features G = p_{M-RBM}(h|v=Q)
    print "\nExtracting features from M-RBM ...\n\n"
    Q, G = None, None
    if not os.path.isdir(args.dbm_dirpath):
        Q = Q_train[:args.n_particles]
        G_path = os.path.join(args.data_path, 'G_train_cifar_naive.npy')
        G = make_rbm_transform(mrbm, Q, G_path)

    # jointly train DBM
    dbm = make_dbm((X_train, X_val), (grbm, mrbm), (Q, G), args)

    # load test data
    X_test, y_test = load_cifar10(mode='test', path=args.data_path)
    X_test /= 255.
    X_test -= X_s_mean
    X_test /= X_s_std

    # G-RBM discriminative fine-tuning:
    # initialize MLP with learned weights,
    # add FC layer and train using backprop
    print "\nG-RBM Discriminative fine-tuning ...\n\n"

    W, hb = None, None
    if not args.mlp_no_init:
        weights = grbm.get_tf_params(scope='weights')
        W = weights['W']
        hb = weights['hb']

    make_mlp((X_train, y_train), (X_val, y_val), (X_test, y_test), (W, hb),
             args)
def main():
    # training settings
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    # general
    parser.add_argument(
        '--gpu',
        type=str,
        default='0',
        metavar='ID',
        help="ID of the GPU to train on (or '' to train on CPU)")

    # data
    parser.add_argument('--n-train',
                        type=int,
                        default=49000,
                        metavar='N',
                        help='number of training examples')
    parser.add_argument('--n-val',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='number of validation examples')
    parser.add_argument('--data-path',
                        type=str,
                        default='../data/',
                        metavar='PATH',
                        help='directory for storing augmented data etc.')
    parser.add_argument('--no-aug',
                        action='store_true',
                        help="if enabled, don't augment data")

    # small RBMs related
    parser.add_argument('--small-lr',
                        type=float,
                        default=1e-3,
                        metavar='LR',
                        nargs='+',
                        help='learning rate or sequence of such (per epoch)')
    parser.add_argument('--small-epochs',
                        type=int,
                        default=100,
                        metavar='N',
                        help='number of epochs to train')
    parser.add_argument('--small-batch-size',
                        type=int,
                        default=48,
                        metavar='B',
                        help='input batch size for training')
    parser.add_argument('--small-l2',
                        type=float,
                        default=1e-3,
                        metavar='L2',
                        help='L2 weight decay coefficient')
    parser.add_argument('--small-sparsity-target',
                        type=float,
                        default=0.1,
                        metavar='T',
                        help='desired probability of hidden activation')
    parser.add_argument('--small-sparsity-cost',
                        type=float,
                        default=1e-3,
                        metavar='C',
                        help='controls the amount of sparsity penalty')
    parser.add_argument('--small-random-seed',
                        type=int,
                        default=9000,
                        metavar='N',
                        help="random seeds for models training")
    parser.add_argument(
        '--small-dirpath-prefix',
        type=str,
        default='../models/rbm_cifar_small_',
        metavar='PREFIX',
        help='directory path prefix to save RBMs trained on patches')

    # M-RBM related
    parser.add_argument(
        '--increase-n-gibbs-steps-every',
        type=int,
        default=16,
        metavar='I',
        help=
        'increase number of Gibbs steps every specified number of epochs for M-RBM'
    )

    # common for RBMs and DBM
    parser.add_argument('--n-gibbs-steps',
                        type=int,
                        default=(1, 1, 1),
                        metavar='N',
                        nargs='+',
                        help='(initial) number of Gibbs steps for CD/PCD')
    parser.add_argument('--lr',
                        type=float,
                        default=(5e-4, 5e-5, 4e-5),
                        metavar='LR',
                        nargs='+',
                        help='(initial) learning rates')
    parser.add_argument('--epochs',
                        type=int,
                        default=(64, 33, 100),
                        metavar='N',
                        nargs='+',
                        help='number of epochs to train')
    parser.add_argument('--batch-size', type=int, default=(100, 100, 100), metavar='B', nargs='+',
                        help='input batch size for training, `--n-train` and `--n-val`' + \
                             'must be divisible by this number (for DBM)')
    parser.add_argument('--l2',
                        type=float,
                        default=(1e-3, 0.005, 0.),
                        metavar='L2',
                        nargs='+',
                        help='L2 weight decay coefficients')
    parser.add_argument('--random-seed',
                        type=int,
                        default=(1111, 2222, 3333),
                        metavar='N',
                        nargs='+',
                        help='random seeds for models training')

    # save dirpaths
    parser.add_argument('--grbm-dirpath',
                        type=str,
                        default='../models/grbm_cifar/',
                        metavar='DIRPATH',
                        help='directory path to save Gaussian RBM')
    parser.add_argument('--mrbm-dirpath',
                        type=str,
                        default='../models/mrbm_cifar/',
                        metavar='DIRPATH',
                        help='directory path to save Multinomial RBM')
    parser.add_argument('--dbm-dirpath',
                        type=str,
                        default='../models/dbm_cifar/',
                        metavar='DIRPATH',
                        help='directory path to save DBM')

    # DBM related
    parser.add_argument('--n-particles',
                        type=int,
                        default=100,
                        metavar='M',
                        help='number of persistent Markov chains')
    parser.add_argument(
        '--max-mf-updates',
        type=int,
        default=50,
        metavar='N',
        help='maximum number of mean-field updates per weight update')
    parser.add_argument('--mf-tol',
                        type=float,
                        default=1e-11,
                        metavar='TOL',
                        help='mean-field tolerance')
    parser.add_argument('--max-norm',
                        type=float,
                        default=4.,
                        metavar='C',
                        help='maximum norm constraint')
    parser.add_argument('--sparsity-target',
                        type=float,
                        default=(0.2, 0.2),
                        metavar='T',
                        nargs='+',
                        help='desired probability of hidden activation')
    parser.add_argument('--sparsity-cost',
                        type=float,
                        default=(1e-4, 1e-3),
                        metavar='C',
                        nargs='+',
                        help='controls the amount of sparsity penalty')
    parser.add_argument('--sparsity-damping',
                        type=float,
                        default=0.9,
                        metavar='D',
                        help='decay rate for hidden activations probs')

    # MLP related
    parser.add_argument('--mlp-no-init',
                        action='store_true',
                        help='if enabled, use random initialization')
    parser.add_argument('--mlp-l2',
                        type=float,
                        default=1e-4,
                        metavar='L2',
                        help='L2 weight decay coefficient')
    parser.add_argument('--mlp-lrm',
                        type=float,
                        default=(0.01, 1.),
                        metavar='LRM',
                        nargs='+',
                        help='learning rate multipliers of 1e-3')
    parser.add_argument('--mlp-epochs',
                        type=int,
                        default=100,
                        metavar='N',
                        help='number of epochs to train')
    parser.add_argument(
        '--mlp-val-metric',
        type=str,
        default='val_acc',
        metavar='S',
        help=
        "metric on validation set to perform early stopping, {'val_acc', 'val_loss'}"
    )
    parser.add_argument('--mlp-batch-size',
                        type=int,
                        default=128,
                        metavar='N',
                        help='input batch size for training')
    parser.add_argument('--mlp-dropout',
                        type=float,
                        default=0.7,
                        metavar='P',
                        help='probability of visible units being set to zero')
    parser.add_argument('--mlp-save-prefix',
                        type=str,
                        default='../data/grbm_',
                        metavar='PREFIX',
                        help='prefix to save MLP predictions and targets')

    # parse and check params
    args = parser.parse_args()
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    for x, m in (
        (args.n_gibbs_steps, 3),
        (args.lr, 3),
        (args.epochs, 3),
        (args.batch_size, 3),
        (args.l2, 3),
        (args.random_seed, 3),
    ):
        if len(x) == 1:
            x *= m

    # prepare data (load + scale + split)
    print("\nPreparing data ...")
    X, y = load_cifar10(mode='train', path=args.data_path)
    X = X.astype(np.float32)
    X /= 255.
    RNG(seed=42).shuffle(X)
    RNG(seed=42).shuffle(y)
    n_train = min(len(X), args.n_train)
    n_val = min(len(X), args.n_val)
    X_train = X[:n_train]
    X_val = X[-n_val:]
    y_train = y[:n_train]
    y_val = y[-n_val:]

    if not args.no_aug:
        # augment data
        X_aug, y_train = make_augmentation(X_train, y_train, n_train, args)

        # convert + scale augmented data again
        X_train = X_aug.astype(np.float32)
        X_train /= 255.
        print("Augmented shape: {0}".format(X_train.shape))
        print("Augmented range: {0}".format((X_train.min(), X_train.max())))

    # center and normalize training data
    X_mean = X_train.mean(axis=0)
    X_std = X_train.std(axis=0)

    if not args.no_aug:
        mean_path = os.path.join(args.data_path, 'X_aug_mean.npy')
        std_path = os.path.join(args.data_path, 'X_aug_std.npy')
        if not os.path.isfile(mean_path):
            np.save(mean_path, X_mean)
        if not os.path.isfile(std_path):
            np.save(std_path, X_std)

    X_train -= X_mean
    X_train /= X_std
    X_val -= X_mean
    X_val /= X_std
    print("Augmented mean: ({0:.3f}, ...); std: ({1:.3f}, ...)".format(
        X_train.mean(axis=0)[0],
        X_train.std(axis=0)[0]))
    print("Augmented range: ({0:.3f}, {1:.3f})\n\n".format(
        X_train.min(), X_train.max()))

    # train 26 small Gaussian RBMs on patches
    small_rbms = None
    if not os.path.isdir(args.grbm_dirpath):
        small_rbms = make_small_rbms((X_train, X_val), args)

    # assemble large weight matrix and biases
    # and pre-train large Gaussian RBM (G-RBM)
    grbm = make_grbm((X_train, X_val), small_rbms, args)

    # extract features Q = p_{G-RBM}(h|v=X)
    print("\nExtracting features from G-RBM ...\n\n")
    Q_train, Q_val = None, None
    if not os.path.isdir(args.mrbm_dirpath) or not os.path.isdir(
            args.dbm_dirpath):
        Q_train_path = os.path.join(args.data_path, 'Q_train_cifar.npy')
        Q_train = make_rbm_transform(grbm,
                                     X_train,
                                     Q_train_path,
                                     np_dtype=np.float16)
    if not os.path.isdir(args.mrbm_dirpath):
        Q_val_path = os.path.join(args.data_path, 'Q_val_cifar.npy')
        Q_val = make_rbm_transform(grbm, X_val, Q_val_path)

    # pre-train Multinomial RBM (M-RBM)
    mrbm = make_mrbm((Q_train, Q_val), args)

    # extract features G = p_{M-RBM}(h|v=Q)
    print("\nExtracting features from M-RBM ...\n\n")
    Q, G = None, None
    if not os.path.isdir(args.dbm_dirpath):
        Q = Q_train[:args.n_particles]
        G_path = os.path.join(args.data_path, 'G_train_cifar.npy')
        G = make_rbm_transform(mrbm, Q, G_path)

    # jointly train DBM
    dbm = make_dbm((X_train, X_val), (grbm, mrbm), (Q, G), args)

    # load test data
    X_test, y_test = load_cifar10(mode='test', path=args.data_path)
    X_test /= 255.
    X_test -= X_mean
    X_test /= X_std

    # G-RBM discriminative fine-tuning:
    # initialize MLP with learned weights,
    # add FC layer and train using backprop
    print("\nG-RBM Discriminative fine-tuning ...\n\n")

    W, hb = None, None
    if not args.mlp_no_init:
        weights = grbm.get_tf_params(scope='weights')
        W = weights['W']
        hb = weights['hb']

    make_mlp((X_train, y_train), (X_val, y_val), (X_test, y_test), (W, hb),
             args)