def make_funcs(config, dbg_out={}): net_in, net_out = hybrid_network(config['num_inputs'], config['num_outputs'], config['num_units'], config['num_sto'], dbg_out=dbg_out) if not config['dbg_out_full']: dbg_out = {} # def f_sample(_inputs, num_samples=1, flatten=False): # _mean, _var = f_step(_inputs) # _samples = [] # for _m, _v in zip(_mean, _var): # _s = np.random.multivariate_normal(_m, np.diag(np.sqrt(_v)), num_samples) # if flatten: _samples.extend(_s) # else: _samples.append(_s) # return np.array(_samples) Y_gt = cgt.matrix("Y") Y_prec = cgt.tensor3('V', fixed_shape=(None, config['num_inputs'], config['num_inputs'])) params = nn.get_parameters(net_out) size_batch, size_out = net_out.shape inputs, outputs = [net_in], [net_out] if config['no_bias']: print "Excluding bias" params = [p for p in params if not p.name.endswith(".b")] loss_vec = dist.gaussian.logprob(Y_gt, net_out, Y_prec) if config['weight_decay'] > 0.: print "Applying penalty on parameter norm" params_flat = cgt.concatenate([p.flatten() for p in params]) loss_param = config['weight_decay'] * cgt.sum(params_flat ** 2) loss_vec -= loss_param # / size_batch loss = cgt.sum(loss_vec) / size_batch # TODO_TZ f_step seems not to fail if X has wrong dim f_step = cgt.function(inputs, outputs) f_surr = get_surrogate_func(inputs + [Y_prec, Y_gt], outputs, [loss_vec], params, _dbg_out=dbg_out) return params, f_step, None, None, None, f_surr
def make_funcs(config, dbg_out={}): net_in, net_out = hybrid_network(config['num_inputs'], config['num_outputs'], config['num_units'], config['num_sto'], dbg_out=dbg_out) if not config['dbg_out_full']: dbg_out = {} # def f_sample(_inputs, num_samples=1, flatten=False): # _mean, _var = f_step(_inputs) # _samples = [] # for _m, _v in zip(_mean, _var): # _s = np.random.multivariate_normal(_m, np.diag(np.sqrt(_v)), num_samples) # if flatten: _samples.extend(_s) # else: _samples.append(_s) # return np.array(_samples) Y_gt = cgt.matrix("Y") Y_prec = cgt.tensor3('V', fixed_shape=(None, config['num_inputs'], config['num_inputs'])) params = nn.get_parameters(net_out) size_batch, size_out = net_out.shape inputs, outputs = [net_in], [net_out] if config['no_bias']: print "Excluding bias" params = [p for p in params if not p.name.endswith(".b")] loss_vec = dist.gaussian.logprob(Y_gt, net_out, Y_prec) if config['weight_decay'] > 0.: print "Applying penalty on parameter norm" params_flat = cgt.concatenate([p.flatten() for p in params]) loss_param = config['weight_decay'] * cgt.sum(params_flat**2) loss_vec -= loss_param # / size_batch loss = cgt.sum(loss_vec) / size_batch # TODO_TZ f_step seems not to fail if X has wrong dim f_step = cgt.function(inputs, outputs) f_surr = get_surrogate_func(inputs + [Y_prec, Y_gt], outputs, [loss_vec], params, _dbg_out=dbg_out) return params, f_step, None, None, None, f_surr
def make_funcs(net_in, net_out, config, dbg_out=None): def f_grad (*x): out = f_surr(*x) return out['loss'], out['surr_loss'], out['surr_grad'] Y = cgt.matrix("Y") params = nn.get_parameters(net_out) if 'no_bias' in config and config['no_bias']: print "Excluding bias" params = [p for p in params if not p.name.endswith(".b")] size_out, size_batch = Y.shape[1], net_in.shape[0] f_step = cgt.function([net_in], [net_out]) # loss_raw of shape (size_batch, 1); loss should be a scalar # sum-of-squares loss sigma = 0.1 loss_raw = -cgt.sum((net_out - Y) ** 2, axis=1, keepdims=True) / sigma # negative log-likelihood # out_sigma = cgt.exp(net_out[:, size_out:]) + 1.e-6 # positive sigma # loss_raw = -gaussian_diagonal.logprob( # Y, net_out, # out_sigma # cgt.fill(.01, [size_batch, size_out]) # ) if 'param_penal_wt' in config: print "Applying penalty on parameter norm" assert config['param_penal_wt'] > 0 params_flat = cgt.concatenate([p.flatten() for p in params]) loss_param = cgt.fill(cgt.sum(params_flat ** 2), [size_batch, 1]) loss_param *= config['param_penal_wt'] loss_raw += loss_param loss = cgt.sum(loss_raw) / size_batch # end of loss definition f_loss = cgt.function([net_in, Y], [net_out, loss]) f_surr = get_surrogate_func([net_in, Y], [net_out] + dbg_out, [loss_raw], params) return params, f_step, f_loss, f_grad, f_surr
def make_funcs(net_in, net_out, config, dbg_out=None): def f_grad(*x): out = f_surr(*x) return out['loss'], out['surr_loss'], out['surr_grad'] Y = cgt.matrix("Y") params = nn.get_parameters(net_out) if 'no_bias' in config and config['no_bias']: print "Excluding bias" params = [p for p in params if not p.name.endswith(".b")] size_out, size_batch = Y.shape[1], net_in.shape[0] f_step = cgt.function([net_in], [net_out]) # loss_raw of shape (size_batch, 1); loss should be a scalar # sum-of-squares loss sigma = 0.1 loss_raw = -cgt.sum((net_out - Y)**2, axis=1, keepdims=True) / sigma # negative log-likelihood # out_sigma = cgt.exp(net_out[:, size_out:]) + 1.e-6 # positive sigma # loss_raw = -gaussian_diagonal.logprob( # Y, net_out, # out_sigma # cgt.fill(.01, [size_batch, size_out]) # ) if 'param_penal_wt' in config: print "Applying penalty on parameter norm" assert config['param_penal_wt'] > 0 params_flat = cgt.concatenate([p.flatten() for p in params]) loss_param = cgt.fill(cgt.sum(params_flat**2), [size_batch, 1]) loss_param *= config['param_penal_wt'] loss_raw += loss_param loss = cgt.sum(loss_raw) / size_batch # end of loss definition f_loss = cgt.function([net_in, Y], [net_out, loss]) f_surr = get_surrogate_func([net_in, Y], [net_out] + dbg_out, [loss_raw], params) return params, f_step, f_loss, f_grad, f_surr