cfg.steprate = ml.common.util.ValueIter(cfg.steprate_itr, cfg.steprate_val) # parameters ps = breze.util.ParameterSet(**FourierShiftNet.parameter_shapes(cfg.x_len, cfg.s_len)) # inputs x = T.matrix('x') #x.tag.test_value = np.random.random((x_len, n_samples)) s = T.matrix('s') #s.tag.test_value = np.random.random((s_len, n_samples)) t = T.matrix('t') #t.tag.test_value = np.random.random((x_len, n_samples)) # functions fsn = FourierShiftNet(**ps.vars) f_output = function(inputs=[ps.flat,x,s], outputs=fsn.output(x,s)) loss = T.mean((fsn.output(x,s) - t)**2) if profile: f_loss = function(inputs=[ps.flat,x,s,t], outputs=loss, mode=profmode, name='f_loss') f_dloss = function(inputs=[ps.flat,x,s,t], outputs=T.grad(loss, ps.flat), mode=profmode, name='f_dloss') else: f_loss = function(inputs=[ps.flat,x,s,t], outputs=loss) f_dloss = function(inputs=[ps.flat,x,s,t], outputs=T.grad(loss, ps.flat)) f_trn_loss = lambda p: f_loss(p, trn_inputs, trn_shifts, trn_targets) f_trn_dloss = lambda p: f_dloss(p, trn_inputs, trn_shifts, trn_targets) # generate data print "Generating data..."
if 'mult_sparsity' in dir(cfg) and cfg.mult_sparsity > 0: print "Adding sparsity constraint on multiplicative weights with factor %f to loss" % cfg.mult_sparsity loss += cfg.mult_sparsity * (T.sum(T.abs_(ps.Xhat_to_Yhat_re) + T.abs_(ps.Xhat_to_Yhat_im)) + T.sum(T.abs_(ps.Shat_to_Yhat_re) + T.abs_(ps.Shat_to_Yhat_im))) if 'penalize_small_yhat_to_y' in dir(cfg) and cfg.penalize_small_yhat_to_y > 0: print "Penalizing small yhat_to_y weights with factor %g" % cfg.penalize_small_yhat_to_y loss += cfg.penalize_small_yhat_to_y * T.sum(T.sqrt(T.sqr(ps.yhat_to_y_re) + T.sqr(ps.yhat_to_y_im) + 0.001)**(-4)) if 'penalize_small_x_to_xhat' in dir(cfg) and cfg.penalize_small_x_to_xhat > 0: print "Penalizing small x_to_xhat weights with factor %g" % cfg.penalize_small_x_to_xhat loss += cfg.penalize_small_x_to_xhat * T.sum(T.sqrt(T.sqr(ps.x_to_xhat_re) + T.sqr(ps.x_to_xhat_im) + 0.001)**(-4)) if 'tight_weights' in dir(cfg) and cfg.tight_weights: print "Tighing input and output weights" loss += T.sum((ps.x_to_xhat_re.T - ps.yhat_to_y_re)**2 + (ps.x_to_xhat_im.T + ps.yhat_to_y_im)**2) # Theano functions f_output = function(inputs=[ps.flat,x,s], outputs=out_re) f_pure_loss = function(inputs=[ps.flat,x,s,t], outputs=pure_loss) f_binary_loss = function(inputs=[ps.flat,x,s,t], outputs=binary_loss) f_loss = function(inputs=[ps.flat,x,s,t], outputs=loss) f_dloss = function(inputs=[ps.flat,x,s,t], outputs=T.grad(loss, ps.flat)) # separate gradients wrt layer weights if show_gradient: f_grads = {} for wname, wvar in ps.vars.iteritems(): f_grads[wname] = function(inputs=[ps.flat,x,s,t], outputs=T.grad(loss, wvar)) if do_weight_plots: plt.figure() # optimizer
def generate_base_data(n_samples): inputs, shifts, targets = generate_data(base_x_len, base_s_len, n_samples) inputs = gp.dot(doubling_matrix(base_x_len).T, inputs) targets = gp.dot(doubling_matrix(base_x_len).T, targets) shifts = gp.dot(shift_doubling_matrix(base_s_len).T, shifts) return inputs, shifts, targets # inputs x = T.matrix('x') s = T.matrix('s') t = T.matrix('t') # functions fsn = FourierShiftNet(**ps.vars) f_output = function(inputs=[ps.flat,x,s], outputs=fsn.output(x,s)) loss = T.mean((fsn.output(x,s) - t)**2) f_loss = function(inputs=[ps.flat,x,s,t], outputs=loss) f_dloss = function(inputs=[ps.flat,x,s,t], outputs=T.grad(loss, ps.flat)) # separate gradients wrt layer weights if show_gradient: f_grads = {} for wname, wvar in ps.vars.iteritems(): f_grads[wname] = function(inputs=[ps.flat,x,s,t], outputs=T.grad(loss, wvar)) print "Generating validation data..." val_inputs, val_shifts, val_targets = generate_data(cfg.n_val_samples) tst_inputs, tst_shifts, tst_targets = generate_data(cfg.n_val_samples) print "Done."
# <headingcell level=2> # Proposal: RBF on sigmoid layer # <codecell> # check kernel x=post(np.array([[11, 21, 31], [12, 22, 32]])) y=post(np.array([[101, 201], [102, 202]])) l=post(np.array([[100]])) tx = T.matrix('x') ty = T.matrix('y') tl = T.matrix('l') f_kernel_matrix = function([tx, ty, tl], StackedRBF.kernel_matrix(tx, ty, tl)) K = f_kernel_matrix(x, y, l) print gather(K) # <codecell> # hyperparameters n_targets = RZ.get_value().shape[0] n_features = RX.get_value().shape[0] n_samples = RX.get_value().shape[1] n_hidden = 50 #n_pivots = int(n_samples / 2) n_pivots = 200 # <codecell>
if 'mult_sparsity' in dir(cfg) and cfg.mult_sparsity > 0: print "Adding sparsity constraint on multiplicative weights with factor %f to loss" % cfg.mult_sparsity loss += cfg.mult_sparsity * (T.sum(T.abs_(ps.Xhat_to_Yhat_re) + T.abs_(ps.Xhat_to_Yhat_im)) + T.sum(T.abs_(ps.Shat_to_Yhat_re) + T.abs_(ps.Shat_to_Yhat_im))) if 'penalize_small_yhat_to_y' in dir(cfg) and cfg.penalize_small_yhat_to_y > 0: print "Penalizing small yhat_to_y weights with factor %g" % cfg.penalize_small_yhat_to_y loss += cfg.penalize_small_yhat_to_y * T.sum(T.sqrt(T.sqr(ps.yhat_to_y_re) + T.sqr(ps.yhat_to_y_im) + 0.001)**(-4)) if 'penalize_small_x_to_xhat' in dir(cfg) and cfg.penalize_small_x_to_xhat > 0: print "Penalizing small x_to_xhat weights with factor %g" % cfg.penalize_small_x_to_xhat loss += cfg.penalize_small_x_to_xhat * T.sum(T.sqrt(T.sqr(ps.x_to_xhat_re) + T.sqr(ps.x_to_xhat_im) + 0.001)**(-4)) if 'tied_weights' in dir(cfg) and cfg.tied_weights: print "Tieing input and output weights" loss += T.sum((ps.x_to_xhat_re.T - ps.yhat_to_y_re)**2 + (ps.x_to_xhat_im.T + ps.yhat_to_y_im)**2) # Theano functions f_output = function(inputs=[ps.flat, x, s], outputs=out_re, on_unused_input='warn') f_pure_loss = function(inputs=[ps.flat, x, s, t], outputs=pure_loss, on_unused_input='warn') f_binary_loss = function(inputs=[ps.flat, x, s, t], outputs=binary_loss, on_unused_input='warn') f_loss = function(inputs=[ps.flat,x, s, t], outputs=loss, on_unused_input='warn') f_dloss = function(inputs=[ps.flat,x, s, t], outputs=T.grad(loss, ps.flat), on_unused_input='warn') # separate gradients wrt layer weights if show_gradient: f_grads = {} for wname, wvar in ps.vars.iteritems(): f_grads[wname] = function(inputs=[ps.flat, x, s, t], outputs=T.grad(loss, wvar)) if do_weight_plots: plt.figure() # optimizer
#np.random.seed(100) #RX, RZ, VX, VZ, TX, TZ = ml.common.util.load_theano_data('../datasets/boston_split.mat') #RX, RZ, VX, VZ, TX, TZ = ml.common.util.load_theano_data('../datasets/abalone_split.mat') # check kernel x=floatx(np.array([[11, 21, 31], [12, 22, 32]])) y=floatx(np.array([[101, 201], [102, 202]])) x=gp.as_garray(x) y=gp.as_garray(y) l = gp.as_garray([[100]]) tx = T.matrix('x') ty = T.matrix('y') tl = T.matrix('l') f_kernel_matrix = function([tx, ty, tl], StackedRBF.kernel_matrix(tx, ty, tl)) K = f_kernel_matrix(x, y, l) print K tsq = T.sum(tx**2) f_sq = function([tx], tsq) print f_sq(gp.as_garray([[5]])) gsq = T.grad(tsq, [tx]) f_gsq = function([tx], gsq) print f_gsq(gp.as_garray([[5]]))