예제 #1
0
 def __init__(self):
     self.n_visible = 12
     self.n_hidden = 8
     self.X = RNG(seed=1337).rand(16, self.n_visible)
     self.X_val = RNG(seed=42).rand(8, self.n_visible)
     self.rbm_config = dict(n_visible=self.n_visible, n_hidden=self.n_hidden,
                            sample_v_states=True, sample_h_states=True,
                            dropout=0.9,
                            verbose=False, display_filters=False,
                            random_seed=1337)
예제 #2
0
파일: mixin.py 프로젝트: enijkamp/rbm
class SeedMixin(BaseMixin):
    def __init__(self, random_seed=None, *args, **kwargs):
        super(SeedMixin, self).__init__(*args, **kwargs)
        self.random_seed = random_seed
        self._rng = RNG(seed=self.random_seed)

    def make_random_seed(self):
        return self._rng.randint(2**31 - 1)
예제 #3
0
def make_augmentation(X_train, y_train, n_train, args):
    X_aug = None
    X_aug_path = os.path.join(args.data_path, 'X_aug.npy')
    y_train = y_train.tolist() * 10
    RNG(seed=1337).shuffle(y_train)

    augment = True
    if os.path.isfile(X_aug_path):
        print "\nLoading augmented data ..."
        X_aug = np.load(X_aug_path)
        print "Checking augmented data ..."
        if len(X_aug) == 10 * n_train:
            augment = False

    if augment:
        print "\nAugmenting data ..."
        s = Stopwatch(verbose=True).start()

        X_aug = np.zeros((10 * n_train, 32, 32, 3), dtype=np.float32)
        X_train = im_unflatten(X_train)
        X_aug[:n_train] = X_train
        for i in xrange(n_train):
            for k, offset in enumerate(((1, 0), (-1, 0), (0, 1), (0, -1))):
                img = X_train[i].copy()
                X_aug[(k + 1) * n_train + i] = shift(img, offset=offset)
        for i in xrange(5 * n_train):
            X_aug[5 * n_train + i] = horizontal_mirror(X_aug[i].copy())

        # shuffle once again
        RNG(seed=1337).shuffle(X_aug)

        # convert to 'uint8' type to save disk space
        X_aug *= 255.
        X_aug = X_aug.astype('uint8')

        # flatten to (10 * `n_train`, 3072) shape
        X_aug = im_flatten(X_aug)

        # save to disk
        np.save(X_aug_path, X_aug)

        s.elapsed()
        print "\n"

    return X_aug, y_train
예제 #4
0
def make_large_weights(small_rbms):
    W = np.zeros((300 * 26, 32, 32, 3), dtype=np.float32)
    W[...] = RNG(seed=1234).rand(*W.shape) * 5e-6
    vb = np.zeros((32, 32, 3))
    hb = np.zeros(300 * 26)

    for i in xrange(4):
        for j in xrange(4):
            rbm_id = 4 * i + j
            weights = small_rbms[rbm_id].get_tf_params(scope='weights')
            W_small = weights['W']
            W_small = W_small.T  # (300, 192)
            W_small = im_unflatten(W_small)  # (300, 8, 8, 3)
            W[300 * rbm_id:300 * (rbm_id + 1), 8 * i:8 * (i + 1),
              8 * j:8 * (j + 1), :] = W_small
            vb[8 * i:8 * (i + 1),
               8 * j:8 * (j + 1), :] += im_unflatten(weights['vb'])
            hb[300 * rbm_id:300 * (rbm_id + 1)] = weights['hb']

    for i in xrange(3):
        for j in xrange(3):
            rbm_id = 16 + 3 * i + j
            weights = small_rbms[rbm_id].get_tf_params(scope='weights')
            W_small = weights['W']
            W_small = W_small.T
            W_small = im_unflatten(W_small)
            W[300 * rbm_id:300 * (rbm_id + 1), 4 + 8 * i:4 + 8 * (i + 1),
              4 + 8 * j:4 + 8 * (j + 1), :] = W_small
            vb[4 + 8 * i:4 + 8 * (i + 1),
               4 + 8 * j:4 + 8 * (j + 1), :] += im_unflatten(weights['vb'])
            hb[300 * rbm_id:300 * (rbm_id + 1)] = weights['hb']

    weights = small_rbms[25].get_tf_params(scope='weights')
    W_small = weights['W']
    W_small = W_small.T
    W_small = im_unflatten(W_small)
    vb_small = im_unflatten(weights['vb'])
    for i in xrange(8):
        for j in xrange(8):
            U = W_small[:, i, j, :]
            U = np.expand_dims(U, -1)
            U = np.expand_dims(U, -1)
            U = U.transpose(0, 2, 3, 1)
            W[-300:, 4 * i:4 * (i + 1), 4 * j:4 * (j + 1), :] = U / 16.
            vb[4 * i:4 * (i + 1),
               4 * j:4 * (j + 1), :] += vb_small[i, j, :].reshape(
                   (1, 1, 3)) / 16.
            hb[-300:] = weights['hb']

    W = im_flatten(W)
    W = W.T
    vb /= 2.
    vb[4:-4, 4:-4, :] /= 1.5
    vb = im_flatten(vb)

    return W, vb, hb
예제 #5
0
def main():
    # training settings
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    # general
    parser.add_argument(
        '--gpu',
        type=str,
        default='0',
        metavar='ID',
        help="ID of the GPU to train on (or '' to train on CPU)")

    # data
    parser.add_argument('--n-train',
                        type=int,
                        default=49000,
                        metavar='N',
                        help='number of training examples')
    parser.add_argument('--n-val',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='number of validation examples')
    parser.add_argument('--data-path',
                        type=str,
                        default='../data/',
                        metavar='PATH',
                        help='directory for storing augmented data etc.')

    # common for RBMs and DBM
    parser.add_argument('--n-gibbs-steps',
                        type=int,
                        default=(1, 1, 1),
                        metavar='N',
                        nargs='+',
                        help='(initial) number of Gibbs steps for CD/PCD')
    parser.add_argument('--lr',
                        type=float,
                        default=(5e-4, 1e-4, 8e-5),
                        metavar='LR',
                        nargs='+',
                        help='(initial) learning rates')
    parser.add_argument('--epochs',
                        type=int,
                        default=(120, 180, 1500),
                        metavar='N',
                        nargs='+',
                        help='number of epochs to train')
    parser.add_argument('--batch-size', type=int, default=(100, 100, 100), metavar='B', nargs='+',
                        help='input batch size for training, `--n-train` and `--n-val`' + \
                             'must be divisible by this number (for DBM)')
    parser.add_argument('--l2',
                        type=float,
                        default=(0.01, 0.05, 1e-8),
                        metavar='L2',
                        nargs='+',
                        help='L2 weight decay coefficients')
    parser.add_argument('--random-seed',
                        type=int,
                        default=(1337, 1111, 2222),
                        metavar='N',
                        nargs='+',
                        help='random seeds for models training')

    # save dirpaths
    parser.add_argument('--grbm-dirpath',
                        type=str,
                        default='../models/grbm_cifar_naive/',
                        metavar='DIRPATH',
                        help='directory path to save Gaussian RBM')
    parser.add_argument('--mrbm-dirpath',
                        type=str,
                        default='../models/mrbm_cifar_naive/',
                        metavar='DIRPATH',
                        help='directory path to save Multinomial RBM')
    parser.add_argument('--dbm-dirpath',
                        type=str,
                        default='../models/dbm_cifar_naive/',
                        metavar='DIRPATH',
                        help='directory path to save DBM')

    # DBM related
    parser.add_argument('--n-particles',
                        type=int,
                        default=100,
                        metavar='M',
                        help='number of persistent Markov chains')
    parser.add_argument(
        '--max-mf-updates',
        type=int,
        default=50,
        metavar='N',
        help='maximum number of mean-field updates per weight update')
    parser.add_argument('--mf-tol',
                        type=float,
                        default=1e-11,
                        metavar='TOL',
                        help='mean-field tolerance')
    parser.add_argument('--max-norm',
                        type=float,
                        default=4.,
                        metavar='C',
                        help='maximum norm constraint')

    # MLP related
    parser.add_argument('--mlp-no-init',
                        action='store_true',
                        help='if enabled, use random initialization')
    parser.add_argument('--mlp-l2',
                        type=float,
                        default=1e-4,
                        metavar='L2',
                        help='L2 weight decay coefficient')
    parser.add_argument('--mlp-lrm',
                        type=float,
                        default=(0.1, 1.),
                        metavar='LRM',
                        nargs='+',
                        help='learning rate multipliers of 1e-3')
    parser.add_argument('--mlp-epochs',
                        type=int,
                        default=100,
                        metavar='N',
                        help='number of epochs to train')
    parser.add_argument(
        '--mlp-val-metric',
        type=str,
        default='val_acc',
        metavar='S',
        help=
        "metric on validation set to perform early stopping, {'val_acc', 'val_loss'}"
    )
    parser.add_argument('--mlp-batch-size',
                        type=int,
                        default=128,
                        metavar='N',
                        help='input batch size for training')
    parser.add_argument('--mlp-dropout',
                        type=float,
                        default=0.64,
                        metavar='P',
                        help='probability of visible units being set to zero')
    parser.add_argument('--mlp-save-prefix',
                        type=str,
                        default='../data/grbm_naive_',
                        metavar='PREFIX',
                        help='prefix to save MLP predictions and targets')

    # parse and check params
    args = parser.parse_args()
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    for x, m in (
        (args.n_gibbs_steps, 3),
        (args.lr, 3),
        (args.epochs, 3),
        (args.batch_size, 3),
        (args.l2, 3),
        (args.random_seed, 3),
    ):
        if len(x) == 1:
            x *= m

    # prepare data (load + scale + split)
    print "\nPreparing data ..."
    X, y = load_cifar10(mode='train', path=args.data_path)
    X = X.astype(np.float32)
    X /= 255.
    RNG(seed=42).shuffle(X)
    RNG(seed=42).shuffle(y)
    n_train = min(len(X), args.n_train)
    n_val = min(len(X), args.n_val)
    X_train = X[:n_train]
    X_val = X[-n_val:]
    y_train = y[:n_train]
    y_val = y[-n_val:]

    # remove 1000 least significant singular values
    X_train = make_smoothing(X_train, n_train, args)
    print X_train.shape

    # center and normalize training data
    X_s_mean = X_train.mean(axis=0)
    X_s_std = X_train.std(axis=0)
    mean_path = os.path.join(args.data_path, 'X_s_mean.npy')
    std_path = os.path.join(args.data_path, 'X_s_std.npy')
    if not os.path.isfile(mean_path):
        np.save(mean_path, X_s_mean)
    if not os.path.isfile(std_path):
        np.save(std_path, X_s_std)

    X_train -= X_s_mean
    X_train /= X_s_std
    X_val -= X_s_mean
    X_val /= X_s_std
    print "Mean: ({0:.3f}, ...); std: ({1:.3f}, ...)".format(
        X_train.mean(axis=0)[0],
        X_train.std(axis=0)[0])
    print "Range: ({0:.3f}, {1:.3f})\n\n".format(X_train.min(), X_train.max())

    # pre-train Gaussian RBM
    grbm = make_grbm((X_train, X_val), args)

    # extract features Q = p_{G-RBM}(h|v=X)
    print "\nExtracting features from G-RBM ...\n\n"
    Q_train, Q_val = None, None
    if not os.path.isdir(args.mrbm_dirpath) or not os.path.isdir(
            args.dbm_dirpath):
        Q_train_path = os.path.join(args.data_path, 'Q_train_cifar_naive.npy')
        Q_train = make_rbm_transform(grbm, X_train, Q_train_path)
    if not os.path.isdir(args.mrbm_dirpath):
        Q_val_path = os.path.join(args.data_path, 'Q_val_cifar_naive.npy')
        Q_val = make_rbm_transform(grbm, X_val, Q_val_path)

    # pre-train Multinomial RBM (M-RBM)
    mrbm = make_mrbm((Q_train, Q_val), args)

    # extract features G = p_{M-RBM}(h|v=Q)
    print "\nExtracting features from M-RBM ...\n\n"
    Q, G = None, None
    if not os.path.isdir(args.dbm_dirpath):
        Q = Q_train[:args.n_particles]
        G_path = os.path.join(args.data_path, 'G_train_cifar_naive.npy')
        G = make_rbm_transform(mrbm, Q, G_path)

    # jointly train DBM
    dbm = make_dbm((X_train, X_val), (grbm, mrbm), (Q, G), args)

    # load test data
    X_test, y_test = load_cifar10(mode='test', path=args.data_path)
    X_test /= 255.
    X_test -= X_s_mean
    X_test /= X_s_std

    # G-RBM discriminative fine-tuning:
    # initialize MLP with learned weights,
    # add FC layer and train using backprop
    print "\nG-RBM Discriminative fine-tuning ...\n\n"

    W, hb = None, None
    if not args.mlp_no_init:
        weights = grbm.get_tf_params(scope='weights')
        W = weights['W']
        hb = weights['hb']

    make_mlp((X_train, y_train), (X_val, y_val), (X_test, y_test), (W, hb),
             args)
예제 #6
0
def main():
    # training settings
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    # general/data
    parser.add_argument(
        '--gpu',
        type=str,
        default='0',
        metavar='ID',
        help="ID of the GPU to train on (or '' to train on CPU)")
    parser.add_argument('--n-train',
                        type=int,
                        default=55000,
                        metavar='N',
                        help='number of training examples')
    parser.add_argument('--n-val',
                        type=int,
                        default=5000,
                        metavar='N',
                        help='number of validation examples')
    parser.add_argument('--data-path',
                        type=str,
                        default='../data/',
                        metavar='PATH',
                        help='directory for storing augmented data etc.')

    # RBM related
    parser.add_argument('--n-hidden',
                        type=int,
                        default=1024,
                        metavar='N',
                        help='number of hidden units')
    parser.add_argument(
        '--w-init',
        type=float,
        default=0.01,
        metavar='STD',
        help=
        'initialize weights from zero-centered Gaussian with this standard deviation'
    )
    parser.add_argument('--vb-init', action='store_false',
                        help='initialize visible biases as logit of mean values of features' + \
                             ', otherwise (if enabled) zero init')
    parser.add_argument('--hb-init',
                        type=float,
                        default=0.,
                        metavar='HB',
                        help='initial hidden bias')
    parser.add_argument(
        '--n-gibbs-steps',
        type=int,
        default=1,
        metavar='N',
        nargs='+',
        help=
        'number of Gibbs updates per weights update or sequence of such (per epoch)'
    )
    parser.add_argument('--lr',
                        type=float,
                        default=0.05,
                        metavar='LR',
                        nargs='+',
                        help='learning rate or sequence of such (per epoch)')
    parser.add_argument('--epochs',
                        type=int,
                        default=120,
                        metavar='N',
                        help='number of epochs to train')
    parser.add_argument('--batch-size',
                        type=int,
                        default=10,
                        metavar='B',
                        help='input batch size for training')
    parser.add_argument('--l2',
                        type=float,
                        default=1e-5,
                        metavar='L2',
                        help='L2 weight decay coefficient')
    parser.add_argument(
        '--sample-v-states',
        action='store_true',
        help='sample visible states, otherwise use probabilities w/o sampling')
    parser.add_argument('--dropout',
                        type=float,
                        metavar='P',
                        help='probability of visible units being on')
    parser.add_argument('--sparsity-target',
                        type=float,
                        default=0.1,
                        metavar='T',
                        help='desired probability of hidden activation')
    parser.add_argument('--sparsity-cost',
                        type=float,
                        default=1e-5,
                        metavar='C',
                        help='controls the amount of sparsity penalty')
    parser.add_argument('--sparsity-damping',
                        type=float,
                        default=0.9,
                        metavar='D',
                        help='decay rate for hidden activations probs')
    parser.add_argument('--random-seed',
                        type=int,
                        default=1337,
                        metavar='N',
                        help="random seed for model training")
    parser.add_argument('--dtype',
                        type=str,
                        default='float32',
                        metavar='T',
                        help="datatype precision to use")
    parser.add_argument('--model-dirpath',
                        type=str,
                        default='../models/rbm_mnist/',
                        metavar='DIRPATH',
                        help='directory path to save the model')

    # MLP related
    parser.add_argument('--mlp-no-init',
                        action='store_true',
                        help='if enabled, use random initialization')
    parser.add_argument('--mlp-l2',
                        type=float,
                        default=1e-5,
                        metavar='L2',
                        help='L2 weight decay coefficient')
    parser.add_argument('--mlp-lrm',
                        type=float,
                        default=(0.1, 1.),
                        metavar='LRM',
                        nargs='+',
                        help='learning rate multipliers of 1e-3')
    parser.add_argument('--mlp-epochs',
                        type=int,
                        default=100,
                        metavar='N',
                        help='number of epochs to train')
    parser.add_argument(
        '--mlp-val-metric',
        type=str,
        default='val_acc',
        metavar='S',
        help=
        "metric on validation set to perform early stopping, {'val_acc', 'val_loss'}"
    )
    parser.add_argument('--mlp-batch-size',
                        type=int,
                        default=128,
                        metavar='N',
                        help='input batch size for training')
    parser.add_argument('--mlp-save-prefix',
                        type=str,
                        default='../data/rbm_',
                        metavar='PREFIX',
                        help='prefix to save MLP predictions and targets')

    args = parser.parse_args()
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    if len(args.mlp_lrm) == 1:
        args.mlp_lrm *= 2

    # prepare data (load + scale + split)
    print("\nPreparing data ...\n\n")
    X, y = load_mnist(mode='train', path=args.data_path)
    X /= 255.
    RNG(seed=42).shuffle(X)
    RNG(seed=42).shuffle(y)
    n_train = min(len(X), args.n_train)
    n_val = min(len(X), args.n_val)
    X_train = X[:n_train]
    y_train = y[:n_train]
    X_val = X[-n_val:]
    y_val = y[-n_val:]

    # train and save the RBM model
    rbm = make_rbm(X_train, X_val, args)

    # load test data
    X_test, y_test = load_mnist(mode='test', path=args.data_path)
    X_test /= 255.

    # discriminative fine-tuning: initialize MLP with
    # learned weights, add FC layer and train using backprop
    print("\nDiscriminative fine-tuning ...\n\n")

    W, hb = None, None
    if not args.mlp_no_init:
        weights = rbm.get_tf_params(scope='weights')
        W = weights['W']
        hb = weights['hb']

    make_mlp((X_train, y_train), (X_val, y_val), (X_test, y_test), (W, hb),
             args)
예제 #7
0
def main():
    # training settings
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    # general
    parser.add_argument(
        '--gpu',
        type=str,
        default='0',
        metavar='ID',
        help="ID of the GPU to train on (or '' to train on CPU)")

    # data
    parser.add_argument('--n-train',
                        type=int,
                        default=49000,
                        metavar='N',
                        help='number of training examples')
    parser.add_argument('--n-val',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='number of validation examples')
    parser.add_argument('--data-path',
                        type=str,
                        default='../data/',
                        metavar='PATH',
                        help='directory for storing augmented data etc.')
    parser.add_argument('--no-aug',
                        action='store_true',
                        help="if enabled, don't augment data")

    # small RBMs related
    parser.add_argument('--small-lr',
                        type=float,
                        default=1e-3,
                        metavar='LR',
                        nargs='+',
                        help='learning rate or sequence of such (per epoch)')
    parser.add_argument('--small-epochs',
                        type=int,
                        default=100,
                        metavar='N',
                        help='number of epochs to train')
    parser.add_argument('--small-batch-size',
                        type=int,
                        default=48,
                        metavar='B',
                        help='input batch size for training')
    parser.add_argument('--small-l2',
                        type=float,
                        default=1e-3,
                        metavar='L2',
                        help='L2 weight decay coefficient')
    parser.add_argument('--small-sparsity-target',
                        type=float,
                        default=0.1,
                        metavar='T',
                        help='desired probability of hidden activation')
    parser.add_argument('--small-sparsity-cost',
                        type=float,
                        default=1e-3,
                        metavar='C',
                        help='controls the amount of sparsity penalty')
    parser.add_argument('--small-random-seed',
                        type=int,
                        default=9000,
                        metavar='N',
                        help="random seeds for models training")
    parser.add_argument(
        '--small-dirpath-prefix',
        type=str,
        default='../models/rbm_cifar_small_',
        metavar='PREFIX',
        help='directory path prefix to save RBMs trained on patches')

    # M-RBM related
    parser.add_argument(
        '--increase-n-gibbs-steps-every',
        type=int,
        default=16,
        metavar='I',
        help=
        'increase number of Gibbs steps every specified number of epochs for M-RBM'
    )

    # common for RBMs and DBM
    parser.add_argument('--n-gibbs-steps',
                        type=int,
                        default=(1, 1, 1),
                        metavar='N',
                        nargs='+',
                        help='(initial) number of Gibbs steps for CD/PCD')
    parser.add_argument('--lr',
                        type=float,
                        default=(5e-4, 5e-5, 4e-5),
                        metavar='LR',
                        nargs='+',
                        help='(initial) learning rates')
    parser.add_argument('--epochs',
                        type=int,
                        default=(64, 33, 100),
                        metavar='N',
                        nargs='+',
                        help='number of epochs to train')
    parser.add_argument('--batch-size', type=int, default=(100, 100, 100), metavar='B', nargs='+',
                        help='input batch size for training, `--n-train` and `--n-val`' + \
                             'must be divisible by this number (for DBM)')
    parser.add_argument('--l2',
                        type=float,
                        default=(1e-3, 0.005, 0.),
                        metavar='L2',
                        nargs='+',
                        help='L2 weight decay coefficients')
    parser.add_argument('--random-seed',
                        type=int,
                        default=(1111, 2222, 3333),
                        metavar='N',
                        nargs='+',
                        help='random seeds for models training')

    # save dirpaths
    parser.add_argument('--grbm-dirpath',
                        type=str,
                        default='../models/grbm_cifar/',
                        metavar='DIRPATH',
                        help='directory path to save Gaussian RBM')
    parser.add_argument('--mrbm-dirpath',
                        type=str,
                        default='../models/mrbm_cifar/',
                        metavar='DIRPATH',
                        help='directory path to save Multinomial RBM')
    parser.add_argument('--dbm-dirpath',
                        type=str,
                        default='../models/dbm_cifar/',
                        metavar='DIRPATH',
                        help='directory path to save DBM')

    # DBM related
    parser.add_argument('--n-particles',
                        type=int,
                        default=100,
                        metavar='M',
                        help='number of persistent Markov chains')
    parser.add_argument(
        '--max-mf-updates',
        type=int,
        default=50,
        metavar='N',
        help='maximum number of mean-field updates per weight update')
    parser.add_argument('--mf-tol',
                        type=float,
                        default=1e-11,
                        metavar='TOL',
                        help='mean-field tolerance')
    parser.add_argument('--max-norm',
                        type=float,
                        default=4.,
                        metavar='C',
                        help='maximum norm constraint')
    parser.add_argument('--sparsity-target',
                        type=float,
                        default=(0.2, 0.2),
                        metavar='T',
                        nargs='+',
                        help='desired probability of hidden activation')
    parser.add_argument('--sparsity-cost',
                        type=float,
                        default=(1e-4, 1e-3),
                        metavar='C',
                        nargs='+',
                        help='controls the amount of sparsity penalty')
    parser.add_argument('--sparsity-damping',
                        type=float,
                        default=0.9,
                        metavar='D',
                        help='decay rate for hidden activations probs')

    # MLP related
    parser.add_argument('--mlp-no-init',
                        action='store_true',
                        help='if enabled, use random initialization')
    parser.add_argument('--mlp-l2',
                        type=float,
                        default=1e-4,
                        metavar='L2',
                        help='L2 weight decay coefficient')
    parser.add_argument('--mlp-lrm',
                        type=float,
                        default=(0.01, 1.),
                        metavar='LRM',
                        nargs='+',
                        help='learning rate multipliers of 1e-3')
    parser.add_argument('--mlp-epochs',
                        type=int,
                        default=100,
                        metavar='N',
                        help='number of epochs to train')
    parser.add_argument(
        '--mlp-val-metric',
        type=str,
        default='val_acc',
        metavar='S',
        help=
        "metric on validation set to perform early stopping, {'val_acc', 'val_loss'}"
    )
    parser.add_argument('--mlp-batch-size',
                        type=int,
                        default=128,
                        metavar='N',
                        help='input batch size for training')
    parser.add_argument('--mlp-dropout',
                        type=float,
                        default=0.7,
                        metavar='P',
                        help='probability of visible units being set to zero')
    parser.add_argument('--mlp-save-prefix',
                        type=str,
                        default='../data/grbm_',
                        metavar='PREFIX',
                        help='prefix to save MLP predictions and targets')

    # parse and check params
    args = parser.parse_args()
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    for x, m in (
        (args.n_gibbs_steps, 3),
        (args.lr, 3),
        (args.epochs, 3),
        (args.batch_size, 3),
        (args.l2, 3),
        (args.random_seed, 3),
    ):
        if len(x) == 1:
            x *= m

    # prepare data (load + scale + split)
    print("\nPreparing data ...")
    X, y = load_cifar10(mode='train', path=args.data_path)
    X = X.astype(np.float32)
    X /= 255.
    RNG(seed=42).shuffle(X)
    RNG(seed=42).shuffle(y)
    n_train = min(len(X), args.n_train)
    n_val = min(len(X), args.n_val)
    X_train = X[:n_train]
    X_val = X[-n_val:]
    y_train = y[:n_train]
    y_val = y[-n_val:]

    if not args.no_aug:
        # augment data
        X_aug, y_train = make_augmentation(X_train, y_train, n_train, args)

        # convert + scale augmented data again
        X_train = X_aug.astype(np.float32)
        X_train /= 255.
        print("Augmented shape: {0}".format(X_train.shape))
        print("Augmented range: {0}".format((X_train.min(), X_train.max())))

    # center and normalize training data
    X_mean = X_train.mean(axis=0)
    X_std = X_train.std(axis=0)

    if not args.no_aug:
        mean_path = os.path.join(args.data_path, 'X_aug_mean.npy')
        std_path = os.path.join(args.data_path, 'X_aug_std.npy')
        if not os.path.isfile(mean_path):
            np.save(mean_path, X_mean)
        if not os.path.isfile(std_path):
            np.save(std_path, X_std)

    X_train -= X_mean
    X_train /= X_std
    X_val -= X_mean
    X_val /= X_std
    print("Augmented mean: ({0:.3f}, ...); std: ({1:.3f}, ...)".format(
        X_train.mean(axis=0)[0],
        X_train.std(axis=0)[0]))
    print("Augmented range: ({0:.3f}, {1:.3f})\n\n".format(
        X_train.min(), X_train.max()))

    # train 26 small Gaussian RBMs on patches
    small_rbms = None
    if not os.path.isdir(args.grbm_dirpath):
        small_rbms = make_small_rbms((X_train, X_val), args)

    # assemble large weight matrix and biases
    # and pre-train large Gaussian RBM (G-RBM)
    grbm = make_grbm((X_train, X_val), small_rbms, args)

    # extract features Q = p_{G-RBM}(h|v=X)
    print("\nExtracting features from G-RBM ...\n\n")
    Q_train, Q_val = None, None
    if not os.path.isdir(args.mrbm_dirpath) or not os.path.isdir(
            args.dbm_dirpath):
        Q_train_path = os.path.join(args.data_path, 'Q_train_cifar.npy')
        Q_train = make_rbm_transform(grbm,
                                     X_train,
                                     Q_train_path,
                                     np_dtype=np.float16)
    if not os.path.isdir(args.mrbm_dirpath):
        Q_val_path = os.path.join(args.data_path, 'Q_val_cifar.npy')
        Q_val = make_rbm_transform(grbm, X_val, Q_val_path)

    # pre-train Multinomial RBM (M-RBM)
    mrbm = make_mrbm((Q_train, Q_val), args)

    # extract features G = p_{M-RBM}(h|v=Q)
    print("\nExtracting features from M-RBM ...\n\n")
    Q, G = None, None
    if not os.path.isdir(args.dbm_dirpath):
        Q = Q_train[:args.n_particles]
        G_path = os.path.join(args.data_path, 'G_train_cifar.npy')
        G = make_rbm_transform(mrbm, Q, G_path)

    # jointly train DBM
    dbm = make_dbm((X_train, X_val), (grbm, mrbm), (Q, G), args)

    # load test data
    X_test, y_test = load_cifar10(mode='test', path=args.data_path)
    X_test /= 255.
    X_test -= X_mean
    X_test /= X_std

    # G-RBM discriminative fine-tuning:
    # initialize MLP with learned weights,
    # add FC layer and train using backprop
    print("\nG-RBM Discriminative fine-tuning ...\n\n")

    W, hb = None, None
    if not args.mlp_no_init:
        weights = grbm.get_tf_params(scope='weights')
        W = weights['W']
        hb = weights['hb']

    make_mlp((X_train, y_train), (X_val, y_val), (X_test, y_test), (W, hb),
             args)
예제 #8
0
파일: mixin.py 프로젝트: enijkamp/rbm
 def __init__(self, random_seed=None, *args, **kwargs):
     super(SeedMixin, self).__init__(*args, **kwargs)
     self.random_seed = random_seed
     self._rng = RNG(seed=self.random_seed)
예제 #9
0
def main():
    # training settings
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    # general/data
    parser.add_argument(
        '--gpu',
        type=str,
        default='0',
        metavar='ID',
        help="ID of the GPU to train on (or '' to train on CPU)")
    parser.add_argument('--n-train',
                        type=int,
                        default=59000,
                        metavar='N',
                        help='number of training examples')
    parser.add_argument('--n-val',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='number of validation examples')

    # RBM #2 related
    parser.add_argument(
        '--increase-n-gibbs-steps-every',
        type=int,
        default=20,
        metavar='I',
        help=
        'increase number of Gibbs steps every specified number of epochs for RBM #2'
    )

    # common for RBMs and DBM
    parser.add_argument('--n-hiddens',
                        type=int,
                        default=(512, 1024),
                        metavar='N',
                        nargs='+',
                        help='numbers of hidden units')
    parser.add_argument('--n-gibbs-steps',
                        type=int,
                        default=(1, 1, 1),
                        metavar='N',
                        nargs='+',
                        help='(initial) number of Gibbs steps for CD/PCD')
    parser.add_argument('--lr',
                        type=float,
                        default=(0.05, 0.01, 2e-3),
                        metavar='LR',
                        nargs='+',
                        help='(initial) learning rates')
    parser.add_argument('--epochs',
                        type=int,
                        default=(64, 120, 500),
                        metavar='N',
                        nargs='+',
                        help='number of epochs to train')
    parser.add_argument('--batch-size', type=int, default=(48, 48, 100), metavar='B', nargs='+',
                        help='input batch size for training, `--n-train` and `--n-val`' + \
                             'must be divisible by this number (for DBM)')
    parser.add_argument('--l2',
                        type=float,
                        default=(1e-3, 2e-4, 1e-7),
                        metavar='L2',
                        nargs='+',
                        help='L2 weight decay coefficients')
    parser.add_argument('--random-seed',
                        type=int,
                        default=(1337, 1111, 2222),
                        metavar='N',
                        nargs='+',
                        help='random seeds for models training')

    # save dirpaths
    parser.add_argument('--rbm1-dirpath',
                        type=str,
                        default='../models/dbm_mnist_rbm1/',
                        metavar='DIRPATH',
                        help='directory path to save RBM #1')
    parser.add_argument('--rbm2-dirpath',
                        type=str,
                        default='../models/dbm_mnist_rbm2/',
                        metavar='DIRPATH',
                        help='directory path to save RBM #2')
    parser.add_argument('--dbm-dirpath',
                        type=str,
                        default='../models/dbm_mnist/',
                        metavar='DIRPATH',
                        help='directory path to save DBM')

    # DBM related
    parser.add_argument('--n-particles',
                        type=int,
                        default=100,
                        metavar='M',
                        help='number of persistent Markov chains')
    parser.add_argument(
        '--max-mf-updates',
        type=int,
        default=50,
        metavar='N',
        help='maximum number of mean-field updates per weight update')
    parser.add_argument('--mf-tol',
                        type=float,
                        default=1e-7,
                        metavar='TOL',
                        help='mean-field tolerance')
    parser.add_argument('--max-norm',
                        type=float,
                        default=6.,
                        metavar='C',
                        help='maximum norm constraint')
    parser.add_argument('--sparsity-target',
                        type=float,
                        default=(0.2, 0.1),
                        metavar='T',
                        nargs='+',
                        help='desired probability of hidden activation')
    parser.add_argument('--sparsity-cost',
                        type=float,
                        default=(1e-4, 5e-5),
                        metavar='C',
                        nargs='+',
                        help='controls the amount of sparsity penalty')
    parser.add_argument('--sparsity-damping',
                        type=float,
                        default=0.9,
                        metavar='D',
                        help='decay rate for hidden activations probs')

    # MLP related
    parser.add_argument('--mlp-no-init',
                        action='store_true',
                        help='if enabled, use random initialization')
    parser.add_argument('--mlp-l2',
                        type=float,
                        default=1e-5,
                        metavar='L2',
                        help='L2 weight decay coefficient')
    parser.add_argument('--mlp-lrm',
                        type=float,
                        default=(0.01, 0.1, 1.),
                        metavar='LRM',
                        nargs='+',
                        help='learning rate multipliers of 1e-3')
    parser.add_argument('--mlp-epochs',
                        type=int,
                        default=100,
                        metavar='N',
                        help='number of epochs to train')
    parser.add_argument(
        '--mlp-val-metric',
        type=str,
        default='val_acc',
        metavar='S',
        help=
        "metric on validation set to perform early stopping, {'val_acc', 'val_loss'}"
    )
    parser.add_argument('--mlp-batch-size',
                        type=int,
                        default=128,
                        metavar='N',
                        help='input batch size for training')
    parser.add_argument('--mlp-save-prefix',
                        type=str,
                        default='../data/dbm_',
                        metavar='PREFIX',
                        help='prefix to save MLP predictions and targets')

    # parse and check params
    args = parser.parse_args()
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    for x, m in (
        (args.n_gibbs_steps, 3),
        (args.lr, 3),
        (args.epochs, 3),
        (args.batch_size, 3),
        (args.l2, 3),
        (args.random_seed, 3),
        (args.sparsity_target, 2),
        (args.sparsity_cost, 2),
        (args.mlp_lrm, 3),
    ):
        if len(x) == 1:
            x *= m

    # prepare data (load + scale + split)
    print "\nPreparing data ...\n\n"
    X, y = load_mnist(mode='train', path='../data/')
    X /= 255.
    RNG(seed=42).shuffle(X)
    RNG(seed=42).shuffle(y)
    n_train = min(len(X), args.n_train)
    n_val = min(len(X), args.n_val)
    X_train = X[:n_train]
    y_train = y[:n_train]
    X_val = X[-n_val:]
    y_val = y[-n_val:]
    X = np.concatenate((X_train, X_val))

    # pre-train RBM #1
    rbm1 = make_rbm1(X, args)

    # freeze RBM #1 and extract features Q = p_{RBM_1}(h|v=X)
    Q = None
    if not os.path.isdir(args.rbm2_dirpath) or not os.path.isdir(
            args.dbm_dirpath):
        print "\nExtracting features from RBM #1 ..."
        Q = rbm1.transform(X)
        print "\n"

    # pre-train RBM #2
    rbm2 = make_rbm2(Q, args)

    # freeze RBM #2 and extract features G = p_{RBM_2}(h|v=Q)
    G = None
    if not os.path.isdir(args.dbm_dirpath):
        print "\nExtracting features from RBM #2 ..."
        G = rbm2.transform(Q)
        print "\n"

    # jointly train DBM
    dbm = make_dbm((X_train, X_val), (rbm1, rbm2), (Q, G), args)

    # load test data
    X_test, y_test = load_mnist(mode='test', path='../data/')
    X_test /= 255.

    # discriminative fine-tuning: initialize MLP with
    # learned weights, add FC layer and train using backprop
    print "\nDiscriminative fine-tuning ...\n\n"

    W, hb = None, None
    W2, hb2 = None, None
    if not args.mlp_no_init:
        weights = dbm.get_tf_params(scope='weights')
        W = weights['W']
        hb = weights['hb']
        W2 = weights['W_1']
        hb2 = weights['hb_1']

    make_mlp((X_train, y_train), (X_val, y_val), (X_test, y_test), (W, hb),
             (W2, hb2), args)