trn_loss = gather(f_loss(ps.data, trn_inputs, trn_shifts, trn_targets)) val_loss = gather(f_loss(ps.data, val_inputs, val_shifts, val_targets)) tst_loss = gather(f_loss(ps.data, tst_inputs, tst_shifts, tst_targets)) his.add(iter, ps.data, trn_loss, val_loss, tst_loss) if his.should_terminate: break # save results ps.data[:] = his.best_pars his.plot() plt.savefig(plot_dir + "/loss.pdf") # check with simple patterns sim_inputs, sim_shifts, sim_targets = generate_data(cfg.x_len, cfg.s_len, 3, binary=True) sim_results = gather(f_output(ps.data, post(sim_inputs), post(sim_shifts))) print "input: " print sim_inputs.T print "shift: " print sim_shifts.T print "targets: " print sim_targets.T print "results: " print sim_results.T # profiler output if profile: profmode.print_summary()
step_shrink=cfg.step_shrink, max_step=cfg.max_step) elif cfg.optimizer == 'rmsprop': opt = climin.RmsProp(ps.data, f_trn_dloss, steprate=cfg.steprate[0], momentum=cfg.momentum) elif cfg.optimizer == 'gradientdescent': opt = climin.GradientDescent(ps.data, f_trn_dloss, steprate=cfg.steprate[0], momentum=cfg.momentum) else: assert False, "unknown optimizer" # initialize or load state if checkpoint is not None: ps.data[:] = post(checkpoint['ps_data']) opt.state = checkpoint['opt_state'] iteration = checkpoint['iteration'] trn_inputs = post(checkpoint['trn_inputs']) trn_shifts = post(checkpoint['trn_shifts']) trn_targets = post(checkpoint['trn_targets']) val_inputs = post(checkpoint['val_inputs']) val_shifts = post(checkpoint['val_shifts']) val_targets = post(checkpoint['val_targets']) tst_inputs = post(checkpoint['tst_inputs']) tst_shifts = post(checkpoint['tst_shifts']) tst_targets = post(checkpoint['tst_targets']) his = checkpoint['his'] else: # initialize parameters if 'continue_training' in dir(cfg) and cfg.continue_training:
# hyperparameters use_base_data = True show_gradient = False check_nans = False cfg, plot_dir = ml.common.util.standard_cfg(prepend_scriptname=False) cfg.steprate = ml.common.util.ValueIter(cfg.steprate_itr, cfg.steprate_val, transition='linear', transition_length=1000) # load base parameters lcls = {} execfile(cfg.base_dir + "/cfg.py", {}, lcls) base_x_len = lcls['x_len'] base_s_len = lcls['s_len'] bps = breze.util.ParameterSet(**FourierShiftNet.parameter_shapes(base_x_len, base_s_len)) bps.data[:] = post(np.load(cfg.base_dir + "/result.npz")['ps']) assert cfg.x_len == 2*base_x_len and cfg.s_len == 2*base_s_len, "can only double neurons" # test: #cfg.x_len = base_x_len #cfg.s_len = base_s_len # parameters ps = breze.util.ParameterSet(**FourierShiftNet.parameter_shapes(cfg.x_len, cfg.s_len)) # transfer base parameters so that nets are equivalent def doubling_matrix(n): d = [[1, 1]] nd = [d for _ in range(n)] return block_diag(*nd)
# <codecell> # load data np.random.seed(100) #RX, RZ, VX, VZ, TX, TZ = ml.common.util.load_theano_data('../datasets/boston_split.mat') RX, RZ, VX, VZ, TX, TZ = ml.common.util.load_theano_data('../datasets/abalone_split.mat') # <headingcell level=2> # Proposal: RBF on sigmoid layer # <codecell> # check kernel x=post(np.array([[11, 21, 31], [12, 22, 32]])) y=post(np.array([[101, 201], [102, 202]])) l=post(np.array([[100]])) tx = T.matrix('x') ty = T.matrix('y') tl = T.matrix('l') f_kernel_matrix = function([tx, ty, tl], StackedRBF.kernel_matrix(tx, ty, tl)) K = f_kernel_matrix(x, y, l) print gather(K) # <codecell> # hyperparameters n_targets = RZ.get_value().shape[0]
plt.title('Xhat_to_Yhat') plt.subplot(3,2,4) plot_complex_weights(gather(ps['Shat_to_Yhat_re']), gather(ps['Shat_to_Yhat_im'])) plt.title('Shat_to_Yhat') plt.subplot(3,2,5) plot_complex_weights(gather(ps['x_to_xhat_re']), gather(ps['x_to_xhat_im'])) plt.title('x_to_xhat') plt.subplot(3,2,6) plot_complex_weights(gather(ps['s_to_shat_re']), gather(ps['s_to_shat_im'])) plt.title('s_to_shat') plt.tight_layout() if __name__ == '__main__': # hyperparameters cfg, plot_dir = ml.common.util.standard_cfg(prepend_scriptname=False) # parameters ps = breze.util.ParameterSet(**FourierShiftNet.parameter_shapes(cfg.x_len, cfg.s_len)) ps.data[:] = post(np.load(plot_dir + "/result.npz")['ps']) # plot plt.figure() plot_all_weights(ps) plt.savefig(plot_dir + "/weights.pdf")
step_shrink=cfg.step_shrink, max_step=cfg.max_step) elif cfg.optimizer == 'rmsprop': opt = climin.RmsProp(ps.data, f_trn_dloss, steprate=cfg.steprate[0], momentum=cfg.momentum) elif cfg.optimizer == 'gradientdescent': opt = climin.GradientDescent(ps.data, f_trn_dloss, steprate=cfg.steprate[0], momentum=cfg.momentum) else: assert False, "unknown optimizer" # initialize parameters if 'continue_training' in dir(cfg) and cfg.continue_training: print "Loading weights..." ps.data[:] = post(np.load(plot_dir + "/base.npz")['ps']) elif 'start_with_optimal_weights' in dir(cfg) and cfg.start_with_optimal_weights: print "Initializing weights optimally..." res = ml.nn.id.FourierIdNet.optimal_weights(cfg.x_len) res = [post(x) for x in res] (ps['x_to_xhat_re'], ps['x_to_xhat_im'], ps['Xhat_to_Yhat_re'], ps['Xhat_to_Yhat_im'], ps['yhat_to_y_re'], ps['yhat_to_y_im']) = res else: print "Initializing weights randomly..." ps.data[:] = cfg.init * (np.random.random(ps.data.shape) - 0.5) if 'perturb_weights' in dir(cfg): for wname, scale in cfg.perturb_weights.iteritems(): if scale > 0: print "Perturbing %s with sigma=%.3f" % (wname, scale)