Example #1
0
def synth_grad(tparams, prefix, activation, labels_one_hot):
    '''
	Synthetic gradient estimation using a linear model
	'''

    return T.dot(activation, tparams[_concat(prefix, 'W')]) + T.dot(
        labels_one_hot, tparams[_concat(prefix, 'C')]) + tparams[_concat(
            prefix, 'b')]
Example #2
0
def param_init_fflayer(params, prefix, nin, nout):
    '''
	Initializes weights for a feedforward layer
	'''
    params[_concat(prefix, 'W')] = init_weights(nin, nout, type_init='ortho')
    params[_concat(prefix, 'b')] = np.zeros((nout, )).astype('float32')

    return params
Example #3
0
def param_init_sgmod(params, prefix, units, zero_init=True):
	'''
	Initialization for synthetic gradient subnetwork
	'''
	global args
	
	# conditioned on the whole image, on the activation produced by encoder input and the backpropagated gradients for latent samples.
	inp_list = [14*28, 14*28, units, units, units]
	inp_size = 0
	for i in range(5):
		inp_size += inp_list[i]

	if not zero_init:
		if args.sg_type == 'lin':
			params[_concat(prefix, 'W')] = init_weights(inp_size, units, type_init='ortho')
			params[_concat(prefix, 'b')] = np.zeros((units,)).astype('float32')

	else:
		if args.sg_type == 'lin' or args.sg_type == 'lin_deep':
			params[_concat(prefix, 'W')] = np.zeros((inp_size, units)).astype('float32')
			params[_concat(prefix, 'b')] = np.zeros((units,)).astype('float32')

		if args.sg_type == 'deep' or args.sg_type == 'lin_deep':
			params = param_init_fflayer(params, _concat(prefix, 'I'), inp_size, 1024, batchnorm=True)
			params = param_init_fflayer(params, _concat(prefix, 'H'), 1024, 1024, batchnorm=True)
			if args.bn_type == 0:
				params = param_init_fflayer(params, _concat(prefix, 'o'), 1024, units, zero_init=True, batchnorm=True)
			else:
				params = param_init_fflayer(params, _concat(prefix, 'o'), 1024, units, zero_init=True, batchnorm=False)

	return params
Example #4
0
def param_init_fflayer(params,
                       prefix,
                       nin,
                       nout,
                       zero_init=False,
                       batchnorm=False,
                       skip_running_vars=False):
    '''
	Initializes weights for a feedforward layer
	'''
    global args
    if zero_init:
        params[_concat(prefix, 'W')] = np.zeros((nin, nout)).astype('float32')
    else:
        params[_concat(prefix, 'W')] = init_weights(nin,
                                                    nout,
                                                    type_init='ortho')

    params[_concat(prefix, 'b')] = np.zeros((nout, )).astype('float32')

    if batchnorm:
        if args.bn_type == 0:
            dim = nin
        else:
            dim = nout
        params[_concat(prefix, 'g')] = np.ones((dim, ), dtype=np.float32)
        params[_concat(prefix, 'be')] = np.zeros((dim, )).astype('float32')

        # it is not necessary for deep synthetic subnetworks to track running averages as they are not used in test time
        if not skip_running_vars:
            params[_concat(prefix, 'rm')] = np.zeros(
                (1, dim)).astype('float32')
            params[_concat(prefix, 'rv')] = np.ones((1, dim), dtype=np.float32)

    return params
Example #5
0
 def _compute_unrolled_model(self, input, target, eta, network_optimizer):
     # loss on train data
     loss = self.model._loss(input, target)
     # w
     theta = _concat(self.model.parameters()).data
     try:
         moment = _concat(network_optimizer.state[v]['momentum_buffer']
                          for v in self.model.parameters()).mul_(
                              self.network_momentum)
     except:
         moment = torch.zeros_like(theta)
     # w_grad + weight_decay * w
     dtheta = _concat(torch.autograd.grad(
         loss,
         self.model.parameters())).data + self.network_weight_decay * theta
     # eta: learning rate
     unrolled_model = self._construct_model_from_theta(
         theta.sub(eta, moment + dtheta))
     return unrolled_model
Example #6
0
def param_init_sgmod(params, prefix, units, zero_init=True):
    '''
	Initializes a linear regression based model for estimating gradients, conditioned on the class labels
	'''
    if not zero_init:
        params[_concat(prefix, 'W')] = init_weights(units,
                                                    units,
                                                    type_init='ortho')
        params[_concat(prefix, 'C')] = init_weights(10,
                                                    units,
                                                    type_init='ortho')

    else:
        params[_concat(prefix, 'W')] = np.zeros(
            (units, units)).astype('float32')
        params[_concat(prefix, 'C')] = np.zeros((10, units)).astype('float32')

    params[_concat(prefix, 'b')] = np.zeros((units, )).astype('float32')

    return params
Example #7
0
def fflayer(tparams, state_below, prefix, nonlin='tanh'):
    '''
	A feedforward layer
	'''
    if nonlin == None:
        return T.dot(state_below, tparams[_concat(
            prefix, 'W')]) + tparams[_concat(prefix, 'b')]
    elif nonlin == 'tanh':
        return T.tanh(
            T.dot(state_below, tparams[_concat(prefix, 'W')]) +
            tparams[_concat(prefix, 'b')])
    elif nonlin == 'sigmoid':
        return T.nnet.nnet.sigmoid(
            T.dot(state_below, tparams[_concat(prefix, 'W')]) +
            tparams[_concat(prefix, 'b')])
    elif nonlin == 'softplus':
        return T.nnet.nnet.softplus(
            T.dot(state_below, tparams[_concat(prefix, 'W')]) +
            tparams[_concat(prefix, 'b')])
    elif nonlin == 'relu':
        return T.nnet.nnet.relu(
            T.dot(state_below, tparams[_concat(prefix, 'W')]) +
            tparams[_concat(prefix, 'b')])
Example #8
0
    def _hessian_vector_product(self, vector, input, target, r=1e-2):
        R = r / _concat(vector).norm()
        for p, v in zip(self.model.parameters(), vector):
            p.data.add_(R, v)
        loss = self.model._loss(input, target)
        grads_p = torch.autograd.grad(loss, self.model.arch_parameters())

        for p, v in zip(self.model.parameters(), vector):
            p.data.sub_(2 * R, v)
        loss = self.model._loss(input, target)
        grads_n = torch.autograd.grad(loss, self.model.arch_parameters())

        for p, v in zip(self.model.parameters(), vector):
            p.data.add_(R, v)

        return [(x - y).div_(2 * R) for x, y in zip(grads_p, grads_n)]
Example #9
0
def synth_grad(tparams, prefix, inp, mode='Train'):
	'''
	Synthetic gradients
	'''
	global args
	if args.sg_type == 'lin':
		return T.dot(inp, tparams[_concat(prefix, 'W')]) + tparams[_concat(prefix, 'b')]
	
	elif args.sg_type == 'deep' or args.sg_type == 'lin_deep':
		outi = fflayer(tparams, inp, _concat(prefix, 'I'), nonlin='relu', batchnorm='train', dropout=None)
		outh = fflayer(tparams, outi, _concat(prefix,'H'), nonlin='relu', batchnorm='train', dropout=None)
		
		# depending on the bn type being used, bn is used/not used in the layer
		if args.bn_type == 0:
			bn_last = 'train'
		else:
			bn_last = None
		
		if args.sg_type == 'deep':
			return fflayer(tparams, outh + outi, _concat(prefix, 'o'), batchnorm=bn_last, nonlin=None)
		elif args.sg_type == 'lin_deep':
			return T.dot(inp, tparams[_concat(prefix, 'W')]) + tparams[_concat(prefix, 'b')] + fflayer(tparams, outh + outi, _concat(prefix, 'o'), batchnorm=bn_last, nonlin=None)
Example #10
0
def param_init_fflayer(params, prefix, nin, nout, zero_init=False, batchnorm=False):
	'''
	Initializes weights for a feedforward layer
	'''
	global args
	if zero_init:
		params[_concat(prefix, 'W')] = np.zeros((nin, nout)).astype('float32')
	else:
		params[_concat(prefix, 'W')] = init_weights(nin, nout, type_init='ortho')
	
	params[_concat(prefix, 'b')] = np.zeros((nout,)).astype('float32')
	
	if batchnorm:
		if args.bn_type == 0:
			dim = nin
		else:
			dim = nout
		params[_concat(prefix, 'g')] = np.ones((dim,), dtype=np.float32)
		params[_concat(prefix, 'be')] = np.zeros((dim,)).astype('float32')
		params[_concat(prefix, 'rm')] = np.zeros((1, dim)).astype('float32')
		params[_concat(prefix, 'rv')] = np.ones((1, dim), dtype=np.float32)
	
	return params
Example #11
0
def fflayer(tparams, state_below, prefix, nonlin='tanh', batchnorm=None, dropout=None):
	'''
	A feedforward layer
	Note: None means dropout/batch normalization is not used.
	Use 'train' or 'test' options.
	'''
	global srng, args

	# apply batchnormalization on the input
	if args.bn_type == 0:
		inp = state_below
	else:
		inp = T.dot(state_below, tparams[_concat(prefix, 'W')]) + tparams[_concat(prefix, 'b')]

	if batchnorm == 'train':
		axes = (0,)
		mean = inp.mean(axes, keepdims=True)
		var = inp.var(axes, keepdims=True)
		invstd = T.inv(T.sqrt(var + 1e-4))
		inp = (inp - mean) * tparams[_concat(prefix, 'g')] * invstd + tparams[_concat(prefix, 'be')]
		
		running_average_factor = 0.1	
		m = T.cast(T.prod(inp.shape) / T.prod(mean.shape), 'float32')
		tparams[_concat(prefix, 'rm')] = tparams[_concat(prefix, 'rm')] * (1 - running_average_factor) + mean * running_average_factor
		tparams[_concat(prefix, 'rv')] = tparams[_concat(prefix, 'rv')] * (1 - running_average_factor) + (m / (m - 1)) * var * running_average_factor
		
	elif batchnorm == 'test':
		inp = (inp - tparams[_concat(prefix, 'rm')].flatten()) * tparams[_concat(prefix, 'g')] / T.sqrt(tparams[_concat(prefix, 'rv')].flatten() + 1e-4) + tparams[_concat(prefix, 'be')]
	
	if args.bn_type == 0:
		preact = T.dot(inp, tparams[_concat(prefix, 'W')]) + tparams[_concat(prefix, 'b')]
	else:
		preact = inp

	# dropout is carried out with fixed probability
	if dropout == 'train':
		dropmask = srng.binomial(n=1, p=1. - args.dropout_prob, size=preact.shape, dtype=theano.config.floatX)
		preact *= dropmask
	
	elif dropout == 'test':
		preact *= 1. - args.dropout_prob

	if nonlin == None:
		return preact
	elif nonlin == 'tanh':
		return T.tanh(preact)
	elif nonlin == 'sigmoid':
		return T.nnet.nnet.sigmoid(preact)
	elif nonlin == 'softplus':
		return T.nnet.nnet.softplus(preact)
	elif nonlin == 'relu':
		return T.nnet.nnet.relu(preact)
Example #12
0
# split images
trp = [split_img(img) for img in trc]
tep = [split_img(img) for img in tec]

print "Initializing parameters"
# parameter initializations
ff_e = 'ff_enc'
ff_d = 'ff_dec'
sg = 'sg'
latent_dim = 50


params = OrderedDict()

# encoder
params = param_init_fflayer(params, _concat(ff_e, 'i'), 14*28, 200, batchnorm=True)
params = param_init_fflayer(params, _concat(ff_e, 'h'), 200, 100, batchnorm=True)

# latent
if args.bn_type == 0:
	params = param_init_fflayer(params, _concat(ff_e, 'bern'), 100, latent_dim, batchnorm=True)
else:
	params = param_init_fflayer(params, _concat(ff_e, 'bern'), 100, latent_dim, batchnorm=False)

# synthetic gradient module for the last encoder layer
params = param_init_sgmod(params, _concat(sg, 'r'), latent_dim)

# loss prediction neural network, conditioned on input and output (in this case the whole image). Acts as the baseline
params = param_init_fflayer(params, 'loss_pred', 28*28, 1)

# decoder parameters
Example #13
0
# split images
trp = [split_img(img) for img in trc]
tep = [split_img(img) for img in tec]

print "Initializing parameters"
# parameter initializations
ff_e = 'ff_enc'
ff_d = 'ff_dec'
latent_dim = 1000

params = OrderedDict()

# encoder
params = param_init_fflayer(params,
                            _concat(ff_e, 'i'),
                            14 * 28,
                            200,
                            batchnorm=True)
params = param_init_fflayer(params,
                            _concat(ff_e, 'h'),
                            200,
                            100,
                            batchnorm=True)

# latent distribution parameters
if args.latent_type == 'cont':
    if args.bn_type == 0:
        params = param_init_fflayer(params,
                                    _concat(ff_e, 'mu'),
                                    100,
Example #14
0
tei = np.asarray(
    [img.flatten() for lbl, img in read(dataset='testing', path='MNIST/')],
    dtype=np.float32)
tel = np.asarray([lbl for lbl, img in read(dataset='testing', path='MNIST/')],
                 dtype=np.int64)

print "Initializing parameters"

ff = 'ff'
sg = 'sg'

# no address for weights
if len(sys.argv) < 3:
    params = OrderedDict()

    params = param_init_fflayer(params, _concat(ff, '1'), 28 * 28, 300)
    params = param_init_fflayer(params, _concat(ff, '2'), 300, 150)

    if train_rou == 'synthetic_gradients':
        params = param_init_sgmod(params, _concat(sg, '1'), 300)
        params = param_init_sgmod(params, _concat(sg, '2'), 150)

    params = param_init_fflayer(params, _concat(ff, 'o'), 150, 10)

else:
    params = np.load(sys.argv[2])

tparams = OrderedDict()
for key, val in params.iteritems():
    tparams[key] = theano.shared(val, name=key)