def main(args): trial = int(args['trial']) pkl_name = 'vrnn_gmm_%d' % trial channel_name = 'valid_nll_upper_bound' data_path = args['data_path'] save_path = args['save_path'] monitoring_freq = int(args['monitoring_freq']) force_saving_freq = int(args['force_saving_freq']) reset_freq = int(args['reset_freq']) epoch = int(args['epoch']) batch_size = int(args['batch_size']) m_batch_size = int(args['m_batch_size']) x_dim = int(args['x_dim']) z_dim = int(args['z_dim']) rnn_dim = int(args['rnn_dim']) k = int(args['num_k']) lr = float(args['lr']) debug = int(args['debug']) print "trial no. %d" % trial print "batch size %d" % batch_size print "learning rate %f" % lr print "saving pkl file '%s'" % pkl_name print "to the save path '%s'" % save_path q_z_dim = 500 p_z_dim = 500 p_x_dim = 500 x2s_dim = 500 z2s_dim = 500 target_dim = x_dim * k file_name = 'blizzard_unseg_tbptt' normal_params = np.load(data_path + file_name + '_normal.npz') X_mean = normal_params['X_mean'] X_std = normal_params['X_std'] model = Model() train_data = Blizzard_tbptt(name='train', path=data_path, frame_size=x_dim, file_name=file_name, X_mean=X_mean, X_std=X_std) valid_data = Blizzard_tbptt(name='valid', path=data_path, frame_size=x_dim, file_name=file_name, X_mean=X_mean, X_std=X_std) x = train_data.theano_vars() m_x = valid_data.theano_vars() if debug: x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=theano.config.floatX) m_x.tag.test_value = np.zeros((15, m_batch_size, x_dim), dtype=theano.config.floatX) init_W = InitCell('rand') init_U = InitCell('ortho') init_b = InitCell('zeros') init_b_sig = InitCell('const', mean=0.6) x_1 = FullyConnectedLayer(name='x_1', parent=['x_t'], parent_dim=[x_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) x_2 = FullyConnectedLayer(name='x_2', parent=['x_1'], parent_dim=[x2s_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) x_3 = FullyConnectedLayer(name='x_3', parent=['x_2'], parent_dim=[x2s_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) x_4 = FullyConnectedLayer(name='x_4', parent=['x_3'], parent_dim=[x2s_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) z_1 = FullyConnectedLayer(name='z_1', parent=['z_t'], parent_dim=[z_dim], nout=z2s_dim, unit='relu', init_W=init_W, init_b=init_b) z_2 = FullyConnectedLayer(name='z_2', parent=['z_1'], parent_dim=[z2s_dim], nout=z2s_dim, unit='relu', init_W=init_W, init_b=init_b) z_3 = FullyConnectedLayer(name='z_3', parent=['z_2'], parent_dim=[z2s_dim], nout=z2s_dim, unit='relu', init_W=init_W, init_b=init_b) z_4 = FullyConnectedLayer(name='z_4', parent=['z_3'], parent_dim=[z2s_dim], nout=z2s_dim, unit='relu', init_W=init_W, init_b=init_b) rnn = LSTM(name='rnn', parent=['x_4', 'z_4'], parent_dim=[x2s_dim, z2s_dim], nout=rnn_dim, unit='tanh', init_W=init_W, init_U=init_U, init_b=init_b) phi_1 = FullyConnectedLayer(name='phi_1', parent=['x_4', 's_tm1'], parent_dim=[x2s_dim, rnn_dim], nout=q_z_dim, unit='relu', init_W=init_W, init_b=init_b) phi_2 = FullyConnectedLayer(name='phi_2', parent=['phi_1'], parent_dim=[q_z_dim], nout=q_z_dim, unit='relu', init_W=init_W, init_b=init_b) phi_3 = FullyConnectedLayer(name='phi_3', parent=['phi_2'], parent_dim=[q_z_dim], nout=q_z_dim, unit='relu', init_W=init_W, init_b=init_b) phi_4 = FullyConnectedLayer(name='phi_4', parent=['phi_3'], parent_dim=[q_z_dim], nout=q_z_dim, unit='relu', init_W=init_W, init_b=init_b) phi_mu = FullyConnectedLayer(name='phi_mu', parent=['phi_4'], parent_dim=[q_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) phi_sig = FullyConnectedLayer(name='phi_sig', parent=['phi_4'], parent_dim=[q_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) prior_1 = FullyConnectedLayer(name='prior_1', parent=['s_tm1'], parent_dim=[rnn_dim], nout=p_z_dim, unit='relu', init_W=init_W, init_b=init_b) prior_2 = FullyConnectedLayer(name='prior_2', parent=['prior_1'], parent_dim=[p_z_dim], nout=p_z_dim, unit='relu', init_W=init_W, init_b=init_b) prior_3 = FullyConnectedLayer(name='prior_3', parent=['prior_2'], parent_dim=[p_z_dim], nout=p_z_dim, unit='relu', init_W=init_W, init_b=init_b) prior_4 = FullyConnectedLayer(name='prior_4', parent=['prior_3'], parent_dim=[p_z_dim], nout=p_z_dim, unit='relu', init_W=init_W, init_b=init_b) prior_mu = FullyConnectedLayer(name='prior_mu', parent=['prior_4'], parent_dim=[p_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) prior_sig = FullyConnectedLayer(name='prior_sig', parent=['prior_4'], parent_dim=[p_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_1 = FullyConnectedLayer(name='theta_1', parent=['z_4', 's_tm1'], parent_dim=[z2s_dim, rnn_dim], nout=p_x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_2 = FullyConnectedLayer(name='theta_2', parent=['theta_1'], parent_dim=[p_x_dim], nout=p_x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_3 = FullyConnectedLayer(name='theta_3', parent=['theta_2'], parent_dim=[p_x_dim], nout=p_x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_4 = FullyConnectedLayer(name='theta_4', parent=['theta_3'], parent_dim=[p_x_dim], nout=p_x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_mu = FullyConnectedLayer(name='theta_mu', parent=['theta_4'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_sig = FullyConnectedLayer(name='theta_sig', parent=['theta_4'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) coeff = FullyConnectedLayer(name='coeff', parent=['theta_4'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) nodes = [rnn, x_1, x_2, x_3, x_4, z_1, z_2, z_3, z_4, phi_1, phi_2, phi_3, phi_4, phi_mu, phi_sig, prior_1, prior_2, prior_3, prior_4, prior_mu, prior_sig, theta_1, theta_2, theta_3, theta_4, theta_mu, theta_sig, coeff] params = OrderedDict() for node in nodes: if node.initialize() is not None: params.update(node.initialize()) params = init_tparams(params) step_count = sharedX(0, name='step_count') last_rnn = np.zeros((batch_size, rnn_dim*2), dtype=theano.config.floatX) rnn_tm1 = sharedX(last_rnn, name='rnn_tm1') shared_updates = OrderedDict() shared_updates[step_count] = step_count + 1 s_0 = T.switch(T.eq(T.mod(step_count, reset_freq), 0), rnn.get_init_state(batch_size), rnn_tm1) x_shape = x.shape x_in = x.reshape((x_shape[0]*x_shape[1], -1)) x_1_in = x_1.fprop([x_in], params) x_2_in = x_2.fprop([x_1_in], params) x_3_in = x_3.fprop([x_2_in], params) x_4_in = x_4.fprop([x_3_in], params) x_4_in = x_4_in.reshape((x_shape[0], x_shape[1], -1)) def inner_fn(x_t, s_tm1): phi_1_t = phi_1.fprop([x_t, s_tm1], params) phi_2_t = phi_2.fprop([phi_1_t], params) phi_3_t = phi_3.fprop([phi_2_t], params) phi_4_t = phi_4.fprop([phi_3_t], params) phi_mu_t = phi_mu.fprop([phi_4_t], params) phi_sig_t = phi_sig.fprop([phi_4_t], params) prior_1_t = prior_1.fprop([s_tm1], params) prior_2_t = prior_2.fprop([prior_1_t], params) prior_3_t = prior_3.fprop([prior_2_t], params) prior_4_t = prior_4.fprop([prior_3_t], params) prior_mu_t = prior_mu.fprop([prior_4_t], params) prior_sig_t = prior_sig.fprop([prior_4_t], params) z_t = Gaussian_sample(phi_mu_t, phi_sig_t) z_1_t = z_1.fprop([z_t], params) z_2_t = z_2.fprop([z_1_t], params) z_3_t = z_3.fprop([z_2_t], params) z_4_t = z_4.fprop([z_3_t], params) s_t = rnn.fprop([[x_t, z_4_t], [s_tm1]], params) return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_4_t ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp, z_4_temp), updates) =\ theano.scan(fn=inner_fn, sequences=[x_4_in], outputs_info=[s_0, None, None, None, None, None]) for k, v in updates.iteritems(): k.default_update = v shared_updates[rnn_tm1] = s_temp[-1] s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0) theta_1_temp = theta_1.fprop([z_4_temp, s_temp], params) theta_2_temp = theta_2.fprop([theta_1_temp], params) theta_3_temp = theta_3.fprop([theta_2_temp], params) theta_4_temp = theta_4.fprop([theta_3_temp], params) theta_mu_temp = theta_mu.fprop([theta_4_temp], params) theta_sig_temp = theta_sig.fprop([theta_4_temp], params) coeff_temp = coeff.fprop([theta_4_temp], params) kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp) x_shape = x.shape x_in = x.reshape((x_shape[0]*x_shape[1], -1)) theta_mu_in = theta_mu_temp.reshape((x_shape[0]*x_shape[1], -1)) theta_sig_in = theta_sig_temp.reshape((x_shape[0]*x_shape[1], -1)) coeff_in = coeff_temp.reshape((x_shape[0]*x_shape[1], -1)) recon = GMM(x_in, theta_mu_in, theta_sig_in, coeff_in) recon_term = recon.mean() kl_term = kl_temp.mean() nll_upper_bound = recon_term + kl_term nll_upper_bound.name = 'nll_upper_bound' m_x_1_temp = x_1.fprop([m_x], params) m_x_2_temp = x_2.fprop([m_x_1_temp], params) m_x_3_temp = x_3.fprop([m_x_2_temp], params) m_x_4_temp = x_4.fprop([m_x_3_temp], params) m_s_0 = rnn.get_init_state(m_batch_size) ((m_s_temp, m_phi_mu_temp, m_phi_sig_temp, m_prior_mu_temp, m_prior_sig_temp, m_z_4_temp), m_updates) =\ theano.scan(fn=inner_fn, sequences=[m_x_4_temp], outputs_info=[m_s_0, None, None, None, None, None]) for k, v in m_updates.iteritems(): k.default_update = v m_s_temp = concatenate([m_s_0[None, :, :], m_s_temp[:-1]], axis=0) m_theta_1_temp = theta_1.fprop([m_z_4_temp, m_s_temp], params) m_theta_2_temp = theta_2.fprop([m_theta_1_temp], params) m_theta_3_temp = theta_3.fprop([m_theta_2_temp], params) m_theta_4_temp = theta_4.fprop([m_theta_3_temp], params) m_theta_mu_temp = theta_mu.fprop([m_theta_4_temp], params) m_theta_sig_temp = theta_sig.fprop([m_theta_4_temp], params) m_coeff_temp = coeff.fprop([m_theta_4_temp], params) m_kl_temp = KLGaussianGaussian(m_phi_mu_temp, m_phi_sig_temp, m_prior_mu_temp, m_prior_sig_temp) m_x_shape = m_x.shape m_x_in = m_x.reshape((m_x_shape[0]*m_x_shape[1], -1)) m_theta_mu_in = m_theta_mu_temp.reshape((m_x_shape[0]*m_x_shape[1], -1)) m_theta_sig_in = m_theta_sig_temp.reshape((m_x_shape[0]*m_x_shape[1], -1)) m_coeff_in = m_coeff_temp.reshape((m_x_shape[0]*m_x_shape[1], -1)) m_recon = GMM(m_x_in, m_theta_mu_in, m_theta_sig_in, m_coeff_in) m_recon_term = m_recon.mean() m_kl_term = m_kl_temp.mean() m_nll_upper_bound = m_recon_term + m_kl_term m_nll_upper_bound.name = 'nll_upper_bound' m_recon_term.name = 'recon_term' m_kl_term.name = 'kl_term' max_x = m_x.max() mean_x = m_x.mean() min_x = m_x.min() max_x.name = 'max_x' mean_x.name = 'mean_x' min_x.name = 'min_x' max_theta_mu = m_theta_mu_in.max() mean_theta_mu = m_theta_mu_in.mean() min_theta_mu = m_theta_mu_in.min() max_theta_mu.name = 'max_theta_mu' mean_theta_mu.name = 'mean_theta_mu' min_theta_mu.name = 'min_theta_mu' max_theta_sig = m_theta_sig_in.max() mean_theta_sig = m_theta_sig_in.mean() min_theta_sig = m_theta_sig_in.min() max_theta_sig.name = 'max_theta_sig' mean_theta_sig.name = 'mean_theta_sig' min_theta_sig.name = 'min_theta_sig' max_phi_sig = m_phi_sig_temp.max() mean_phi_sig = m_phi_sig_temp.mean() min_phi_sig = m_phi_sig_temp.min() max_phi_sig.name = 'max_phi_sig' mean_phi_sig.name = 'mean_phi_sig' min_phi_sig.name = 'min_phi_sig' max_prior_sig = m_prior_sig_temp.max() mean_prior_sig = m_prior_sig_temp.mean() min_prior_sig = m_prior_sig_temp.min() max_prior_sig.name = 'max_prior_sig' mean_prior_sig.name = 'mean_prior_sig' min_prior_sig.name = 'min_prior_sig' model.inputs = [x] model.params = params model.nodes = nodes model.set_updates(shared_updates) optimizer = Adam( lr=lr ) monitor_fn = theano.function(inputs=[m_x], outputs=[m_nll_upper_bound, m_recon_term, m_kl_term, max_phi_sig, mean_phi_sig, min_phi_sig, max_prior_sig, mean_prior_sig, min_prior_sig, max_theta_sig, mean_theta_sig, min_theta_sig, max_x, mean_x, min_x, max_theta_mu, mean_theta_mu, min_theta_mu], on_unused_input='ignore') extension = [ GradientClipping(batch_size=batch_size, check_nan=1), EpochCount(epoch), Monitoring(freq=monitoring_freq, monitor_fn=monitor_fn, ddout=[m_nll_upper_bound, m_recon_term, m_kl_term, max_phi_sig, mean_phi_sig, min_phi_sig, max_prior_sig, mean_prior_sig, min_prior_sig, max_theta_sig, mean_theta_sig, min_theta_sig, max_x, mean_x, min_x, max_theta_mu, mean_theta_mu, min_theta_mu], data=[Iterator(train_data, m_batch_size, start=0, end=112640), Iterator(valid_data, m_batch_size, start=2040064, end=2152704)]), Picklize(freq=monitoring_freq, force_save_freq=force_saving_freq, path=save_path), EarlyStopping(freq=monitoring_freq, force_save_freq=force_saving_freq, path=save_path, channel=channel_name), WeightNorm() ] mainloop = Training( name=pkl_name, data=Iterator(train_data, batch_size, start=0, end=2040064), model=model, optimizer=optimizer, cost=nll_upper_bound, outputs=[nll_upper_bound], extension=extension ) mainloop.run()
def main(args): trial = int(args['trial']) pkl_name = 'vrnn_gauss_%d' % trial channel_name = 'valid_nll_upper_bound' data_path = args['data_path'] save_path = args['save_path'] monitoring_freq = int(args['monitoring_freq']) epoch = int(args['epoch']) batch_size = int(args['batch_size']) x_dim = int(args['x_dim']) z_dim = int(args['z_dim']) rnn_dim = int(args['rnn_dim']) lr = float(args['lr']) debug = int(args['debug']) print "trial no. %d" % trial print "batch size %d" % batch_size print "learning rate %f" % lr print "saving pkl file '%s'" % pkl_name print "to the save path '%s'" % save_path q_z_dim = 150 p_z_dim = 150 p_x_dim = 250 x2s_dim = 250 z2s_dim = 150 target_dim = (x_dim - 1) model = Model() train_data = IAMOnDB(name='train', prep='normalize', cond=False, path=data_path) X_mean = train_data.X_mean X_std = train_data.X_std valid_data = IAMOnDB(name='valid', prep='normalize', cond=False, path=data_path, X_mean=X_mean, X_std=X_std) init_W = InitCell('rand') init_U = InitCell('ortho') init_b = InitCell('zeros') init_b_sig = InitCell('const', mean=0.6) x, mask = train_data.theano_vars() if debug: x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32) temp = np.ones((15, batch_size), dtype=np.float32) temp[:, -2:] = 0. mask.tag.test_value = temp x_1 = FullyConnectedLayer(name='x_1', parent=['x_t'], parent_dim=[x_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) z_1 = FullyConnectedLayer(name='z_1', parent=['z_t'], parent_dim=[z_dim], nout=z2s_dim, unit='relu', init_W=init_W, init_b=init_b) rnn = LSTM(name='rnn', parent=['x_1', 'z_1'], parent_dim=[x2s_dim, z2s_dim], nout=rnn_dim, unit='tanh', init_W=init_W, init_U=init_U, init_b=init_b) phi_1 = FullyConnectedLayer(name='phi_1', parent=['x_1', 's_tm1'], parent_dim=[x2s_dim, rnn_dim], nout=q_z_dim, unit='relu', init_W=init_W, init_b=init_b) phi_mu = FullyConnectedLayer(name='phi_mu', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) phi_sig = FullyConnectedLayer(name='phi_sig', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) prior_1 = FullyConnectedLayer(name='prior_1', parent=['s_tm1'], parent_dim=[rnn_dim], nout=p_z_dim, unit='relu', init_W=init_W, init_b=init_b) prior_mu = FullyConnectedLayer(name='prior_mu', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) prior_sig = FullyConnectedLayer(name='prior_sig', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_1 = FullyConnectedLayer(name='theta_1', parent=['z_1', 's_tm1'], parent_dim=[z2s_dim, rnn_dim], nout=p_x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_mu = FullyConnectedLayer(name='theta_mu', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_sig = FullyConnectedLayer(name='theta_sig', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) corr = FullyConnectedLayer(name='corr', parent=['theta_1'], parent_dim=[p_x_dim], nout=1, unit='tanh', init_W=init_W, init_b=init_b) binary = FullyConnectedLayer(name='binary', parent=['theta_1'], parent_dim=[p_x_dim], nout=1, unit='sigmoid', init_W=init_W, init_b=init_b) nodes = [ rnn, x_1, z_1, phi_1, phi_mu, phi_sig, prior_1, prior_mu, prior_sig, theta_1, theta_mu, theta_sig, corr, binary ] params = OrderedDict() for node in nodes: if node.initialize() is not None: params.update(node.initialize()) params = init_tparams(params) s_0 = rnn.get_init_state(batch_size) x_1_temp = x_1.fprop([x], params) def inner_fn(x_t, s_tm1): phi_1_t = phi_1.fprop([x_t, s_tm1], params) phi_mu_t = phi_mu.fprop([phi_1_t], params) phi_sig_t = phi_sig.fprop([phi_1_t], params) prior_1_t = prior_1.fprop([s_tm1], params) prior_mu_t = prior_mu.fprop([prior_1_t], params) prior_sig_t = prior_sig.fprop([prior_1_t], params) z_t = Gaussian_sample(phi_mu_t, phi_sig_t) z_1_t = z_1.fprop([z_t], params) s_t = rnn.fprop([[x_t, z_1_t], [s_tm1]], params) return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_1_t ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp, z_1_temp), updates) =\ theano.scan(fn=inner_fn, sequences=[x_1_temp], outputs_info=[s_0, None, None, None, None, None]) for k, v in updates.iteritems(): k.default_update = v s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0) theta_1_temp = theta_1.fprop([z_1_temp, s_temp], params) theta_mu_temp = theta_mu.fprop([theta_1_temp], params) theta_sig_temp = theta_sig.fprop([theta_1_temp], params) corr_temp = corr.fprop([theta_1_temp], params) binary_temp = binary.fprop([theta_1_temp], params) kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp) x_shape = x.shape x_in = x.reshape((x_shape[0] * x_shape[1], -1)) theta_mu_in = theta_mu_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig_in = theta_sig_temp.reshape((x_shape[0] * x_shape[1], -1)) corr_in = corr_temp.reshape((x_shape[0] * x_shape[1], -1)) binary_in = binary_temp.reshape((x_shape[0] * x_shape[1], -1)) recon = BiGauss(x_in, theta_mu_in, theta_sig_in, corr_in, binary_in) recon = recon.reshape((x_shape[0], x_shape[1])) recon = recon * mask recon_term = recon.sum(axis=0).mean() recon_term.name = 'recon_term' kl_temp = kl_temp * mask kl_term = kl_temp.sum(axis=0).mean() kl_term.name = 'kl_term' nll_upper_bound = recon_term + kl_term nll_upper_bound.name = 'nll_upper_bound' max_x = x.max() mean_x = x.mean() min_x = x.min() max_x.name = 'max_x' mean_x.name = 'mean_x' min_x.name = 'min_x' max_theta_mu = theta_mu_in.max() mean_theta_mu = theta_mu_in.mean() min_theta_mu = theta_mu_in.min() max_theta_mu.name = 'max_theta_mu' mean_theta_mu.name = 'mean_theta_mu' min_theta_mu.name = 'min_theta_mu' max_theta_sig = theta_sig_in.max() mean_theta_sig = theta_sig_in.mean() min_theta_sig = theta_sig_in.min() max_theta_sig.name = 'max_theta_sig' mean_theta_sig.name = 'mean_theta_sig' min_theta_sig.name = 'min_theta_sig' max_phi_sig = phi_sig_temp.max() mean_phi_sig = phi_sig_temp.mean() min_phi_sig = phi_sig_temp.min() max_phi_sig.name = 'max_phi_sig' mean_phi_sig.name = 'mean_phi_sig' min_phi_sig.name = 'min_phi_sig' max_prior_sig = prior_sig_temp.max() mean_prior_sig = prior_sig_temp.mean() min_prior_sig = prior_sig_temp.min() max_prior_sig.name = 'max_prior_sig' mean_prior_sig.name = 'mean_prior_sig' min_prior_sig.name = 'min_prior_sig' model.inputs = [x, mask] model.params = params model.nodes = nodes optimizer = Adam(lr=lr) extension = [ GradientClipping(batch_size=batch_size), EpochCount(epoch), Monitoring(freq=monitoring_freq, ddout=[ nll_upper_bound, recon_term, kl_term, max_phi_sig, mean_phi_sig, min_phi_sig, max_prior_sig, mean_prior_sig, min_prior_sig, max_theta_sig, mean_theta_sig, min_theta_sig, max_x, mean_x, min_x, max_theta_mu, mean_theta_mu, min_theta_mu ], data=[Iterator(valid_data, batch_size)]), Picklize(freq=monitoring_freq, path=save_path), EarlyStopping(freq=monitoring_freq, path=save_path, channel=channel_name), WeightNorm() ] mainloop = Training(name=pkl_name, data=Iterator(train_data, batch_size), model=model, optimizer=optimizer, cost=nll_upper_bound, outputs=[nll_upper_bound], extension=extension) mainloop.run()
def main(args): trial = int(args['trial']) pkl_name = 'rnn_gauss_%d' % trial channel_name = 'valid_nll' data_path = args['data_path'] save_path = args['save_path'] monitoring_freq = int(args['monitoring_freq']) force_saving_freq = int(args['force_saving_freq']) reset_freq = int(args['reset_freq']) epoch = int(args['epoch']) batch_size = int(args['batch_size']) m_batch_size = int(args['m_batch_size']) x_dim = int(args['x_dim']) z_dim = int(args['z_dim']) rnn_dim = int(args['rnn_dim']) lr = float(args['lr']) debug = int(args['debug']) print "trial no. %d" % trial print "batch size %d" % batch_size print "learning rate %f" % lr print "saving pkl file '%s'" % pkl_name print "to the save path '%s'" % save_path x2s_dim = 800 s2x_dim = 800 target_dim = x_dim file_name = 'blizzard_unseg_tbptt' normal_params = np.load(data_path + file_name + '_normal.npz') X_mean = normal_params['X_mean'] X_std = normal_params['X_std'] model = Model() train_data = Blizzard_tbptt(name='train', path=data_path, frame_size=x_dim, file_name=file_name, X_mean=X_mean, X_std=X_std) valid_data = Blizzard_tbptt(name='valid', path=data_path, frame_size=x_dim, file_name=file_name, X_mean=X_mean, X_std=X_std) x = train_data.theano_vars() m_x = valid_data.theano_vars() if debug: x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=theano.config.floatX) m_x.tag.test_value = np.zeros((15, m_batch_size, x_dim), dtype=theano.config.floatX) init_W = InitCell('rand') init_U = InitCell('ortho') init_b = InitCell('zeros') init_b_sig = InitCell('const', mean=0.6) x_1 = FullyConnectedLayer(name='x_1', parent=['x_t'], parent_dim=[x_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) x_2 = FullyConnectedLayer(name='x_2', parent=['x_1'], parent_dim=[x2s_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) x_3 = FullyConnectedLayer(name='x_3', parent=['x_2'], parent_dim=[x2s_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) x_4 = FullyConnectedLayer(name='x_4', parent=['x_3'], parent_dim=[x2s_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) rnn = LSTM(name='rnn', parent=['x_4'], parent_dim=[x2s_dim], nout=rnn_dim, unit='tanh', init_W=init_W, init_U=init_U, init_b=init_b) theta_1 = FullyConnectedLayer(name='theta_1', parent=['s_tm1'], parent_dim=[rnn_dim], nout=s2x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_2 = FullyConnectedLayer(name='theta_2', parent=['theta_1'], parent_dim=[s2x_dim], nout=s2x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_3 = FullyConnectedLayer(name='theta_3', parent=['theta_2'], parent_dim=[s2x_dim], nout=s2x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_4 = FullyConnectedLayer(name='theta_4', parent=['theta_3'], parent_dim=[s2x_dim], nout=s2x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_mu = FullyConnectedLayer(name='theta_mu', parent=['theta_4'], parent_dim=[s2x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_sig = FullyConnectedLayer(name='theta_sig', parent=['theta_4'], parent_dim=[s2x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) nodes = [ rnn, x_1, x_2, x_3, x_4, theta_1, theta_2, theta_3, theta_4, theta_mu, theta_sig ] params = OrderedDict() for node in nodes: if node.initialize() is not None: params.update(node.initialize()) params = init_tparams(params) step_count = sharedX(0, name='step_count') last_rnn = np.zeros((batch_size, rnn_dim * 2), dtype=theano.config.floatX) rnn_tm1 = sharedX(last_rnn, name='rnn_tm1') shared_updates = OrderedDict() shared_updates[step_count] = step_count + 1 s_0 = T.switch(T.eq(T.mod(step_count, reset_freq), 0), rnn.get_init_state(batch_size), rnn_tm1) x_1_temp = x_1.fprop([x], params) x_2_temp = x_2.fprop([x_1_temp], params) x_3_temp = x_3.fprop([x_2_temp], params) x_4_temp = x_4.fprop([x_3_temp], params) def inner_fn(x_t, s_tm1): s_t = rnn.fprop([[x_t], [s_tm1]], params) return s_t (s_temp, updates) = theano.scan(fn=inner_fn, sequences=[x_4_temp], outputs_info=[s_0]) for k, v in updates.iteritems(): k.default_update = v shared_updates[rnn_tm1] = s_temp[-1] s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0) theta_1_temp = theta_1.fprop([s_temp], params) theta_2_temp = theta_2.fprop([theta_1_temp], params) theta_3_temp = theta_3.fprop([theta_2_temp], params) theta_4_temp = theta_4.fprop([theta_3_temp], params) theta_mu_temp = theta_mu.fprop([theta_4_temp], params) theta_sig_temp = theta_sig.fprop([theta_4_temp], params) recon = Gaussian(x, theta_mu_temp, theta_sig_temp) recon_term = recon.mean() recon_term.name = 'nll' m_x_1_temp = x_1.fprop([m_x], params) m_x_2_temp = x_2.fprop([m_x_1_temp], params) m_x_3_temp = x_3.fprop([m_x_2_temp], params) m_x_4_temp = x_4.fprop([m_x_3_temp], params) m_s_0 = rnn.get_init_state(m_batch_size) (m_s_temp, m_updates) = theano.scan(fn=inner_fn, sequences=[m_x_4_temp], outputs_info=[m_s_0]) for k, v in m_updates.iteritems(): k.default_update = v m_s_temp = concatenate([m_s_0[None, :, :], m_s_temp[:-1]], axis=0) m_theta_1_temp = theta_1.fprop([m_s_temp], params) m_theta_2_temp = theta_2.fprop([m_theta_1_temp], params) m_theta_3_temp = theta_3.fprop([m_theta_2_temp], params) m_theta_4_temp = theta_4.fprop([m_theta_3_temp], params) m_theta_mu_temp = theta_mu.fprop([m_theta_4_temp], params) m_theta_sig_temp = theta_sig.fprop([m_theta_4_temp], params) m_recon = Gaussian(m_x, m_theta_mu_temp, m_theta_sig_temp) m_recon_term = m_recon.mean() m_recon_term.name = 'nll' max_x = m_x.max() mean_x = m_x.mean() min_x = m_x.min() max_x.name = 'max_x' mean_x.name = 'mean_x' min_x.name = 'min_x' max_theta_mu = m_theta_mu_temp.max() mean_theta_mu = m_theta_mu_temp.mean() min_theta_mu = m_theta_mu_temp.min() max_theta_mu.name = 'max_theta_mu' mean_theta_mu.name = 'mean_theta_mu' min_theta_mu.name = 'min_theta_mu' max_theta_sig = m_theta_sig_temp.max() mean_theta_sig = m_theta_sig_temp.mean() min_theta_sig = m_theta_sig_temp.min() max_theta_sig.name = 'max_theta_sig' mean_theta_sig.name = 'mean_theta_sig' min_theta_sig.name = 'min_theta_sig' model.inputs = [x] model.params = params model.nodes = nodes model.set_updates(shared_updates) optimizer = Adam(lr=lr) monitor_fn = theano.function(inputs=[m_x], outputs=[ m_recon_term, max_theta_sig, mean_theta_sig, min_theta_sig, max_x, mean_x, min_x, max_theta_mu, mean_theta_mu, min_theta_mu ], on_unused_input='ignore') extension = [ GradientClipping(batch_size=batch_size, check_nan=1), EpochCount(epoch), Monitoring(freq=monitoring_freq, monitor_fn=monitor_fn, ddout=[ m_recon_term, max_theta_sig, mean_theta_sig, min_theta_sig, max_x, mean_x, min_x, max_theta_mu, mean_theta_mu, min_theta_mu ], data=[ Iterator(train_data, m_batch_size, start=0, end=112640), Iterator(valid_data, m_batch_size, start=2040064, end=2152704) ]), Picklize(freq=monitoring_freq, force_save_freq=force_saving_freq, path=save_path), EarlyStopping(freq=monitoring_freq, force_save_freq=force_saving_freq, path=save_path, channel=channel_name), WeightNorm() ] mainloop = Training(name=pkl_name, data=Iterator(train_data, batch_size, start=0, end=2040064), model=model, optimizer=optimizer, cost=recon_term, outputs=[recon_term], extension=extension) mainloop.run()
def main(args): theano.optimizer = 'fast_compile' theano.config.exception_verbosity = 'high' trial = int(args['trial']) pkl_name = 'vrnn_gmm_%d' % trial channel_name = 'mse_val' data_path = args['data_path'] save_path = args[ 'save_path'] #+'/gmm/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M") flgMSE = int(args['flgMSE']) genCase = int(args['genCase']) period = int(args['period']) n_steps = int(args['n_steps']) stride_train = int(args['stride_train']) stride_test = n_steps # int(args['stride_test']) monitoring_freq = int(args['monitoring_freq']) epoch = int(args['epoch']) batch_size = int(args['batch_size']) x_dim = int(args['x_dim']) y_dim = int(args['y_dim']) flgAgg = int(args['flgAgg']) z_dim = int(args['z_dim']) rnn_dim = int(args['rnn_dim']) k = int(args['num_k']) #a mixture of K Gaussian functions lr = float(args['lr']) debug = int(args['debug']) num_sequences_per_batch = int(args['numSequences']) #based on appliance loadParam = args['loadAsKelly'] target_inclusion_prob = float(args['target_inclusion_prob']) loadAsKelly = True if (loadParam == 'N' or loadParam == 'n' or loadParam == 'no' or loadParam == 'NO' or loadParam == 'No'): loadAsKelly = False print "trial no. %d" % trial print "batch size %d" % batch_size print "learning rate %f" % lr print "saving pkl file '%s'" % pkl_name print "to the save path '%s'" % save_path q_z_dim = 60 #150 p_z_dim = 60 #150 p_x_dim = 30 #250 x2s_dim = 40 #250 z2s_dim = 40 #150 target_dim = k #x_dim #(x_dim-1)*k model = Model() Xtrain, ytrain, Xval, yval, reader = fetch_ukdale( data_path, windows, appliances, numApps=flgAgg, period=period, n_steps=n_steps, stride_train=stride_train, stride_test=stride_test, flgAggSumScaled=1, flgFilterZeros=1, isKelly=loadAsKelly, seq_per_batch=num_sequences_per_batch, target_inclusion_prob=target_inclusion_prob) instancesPlot = { 0: [10, 20], 2: [20, 30] } #for now use hard coded instancesPlot for kelly sampling if (not loadAsKelly): instancesPlot = reader.build_dict_instances_plot( listDates, batch_size, Xval.shape[0]) ############# We switch x with y train_data = UKdale( name='train', prep='normalize', cond=False, #path=data_path, validTime=0, inputX=ytrain, labels=Xtrain) X_mean = train_data.X_mean X_std = train_data.X_std valid_data = UKdale( name='valid', prep='normalize', cond=False, #path=data_path, X_mean=X_mean, X_std=X_std, validTime=1, inputX=yval, labels=Xval) init_W = InitCell('rand') init_U = InitCell('ortho') init_b = InitCell('zeros') init_b_sig = InitCell('const', mean=0.6) x, mask = train_data.theano_vars() #valTime = train_data.theano_valTime_vars() if (genCase == 1): inputX = x[:-1, :] targetX = x[1:, :] n_steps = n_steps - 1 else: inputX = x targetX = x inputX.name = 'x_original' if debug: inputX.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32) temp = np.ones((15, batch_size), dtype=np.float32) temp[:, -2:] = 0. mask.tag.test_value = temp x_1 = FullyConnectedLayer(name='x_1', parent=['x_t'], parent_dim=[x_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) z_1 = FullyConnectedLayer(name='z_1', parent=['z_t'], parent_dim=[z_dim], nout=z2s_dim, unit='relu', init_W=init_W, init_b=init_b) rnn = LSTM(name='rnn', parent=['x_1', 'z_1'], parent_dim=[x2s_dim, z2s_dim], nout=rnn_dim, unit='tanh', init_W=init_W, init_U=init_U, init_b=init_b) phi_1 = FullyConnectedLayer(name='phi_1', parent=['x_1', 's_tm1'], parent_dim=[x2s_dim, rnn_dim], nout=q_z_dim, unit='relu', init_W=init_W, init_b=init_b) phi_mu = FullyConnectedLayer(name='phi_mu', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) phi_sig = FullyConnectedLayer(name='phi_sig', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) prior_1 = FullyConnectedLayer(name='prior_1', parent=['s_tm1'], parent_dim=[rnn_dim], nout=p_z_dim, unit='relu', init_W=init_W, init_b=init_b) prior_mu = FullyConnectedLayer(name='prior_mu', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) prior_sig = FullyConnectedLayer(name='prior_sig', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_1 = FullyConnectedLayer(name='theta_1', parent=['z_1', 's_tm1'], parent_dim=[z2s_dim, rnn_dim], nout=p_x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_mu = FullyConnectedLayer(name='theta_mu', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_sig = FullyConnectedLayer(name='theta_sig', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) coeff = FullyConnectedLayer(name='coeff', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) corr = FullyConnectedLayer(name='corr', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='tanh', init_W=init_W, init_b=init_b) binary = FullyConnectedLayer(name='binary', parent=['theta_1'], parent_dim=[p_x_dim], nout=1, unit='sigmoid', init_W=init_W, init_b=init_b) nodes = [ rnn, x_1, z_1, #dissag_pred, phi_1, phi_mu, phi_sig, prior_1, prior_mu, prior_sig, theta_1, theta_mu, theta_sig, coeff ] #, corr, binary params = OrderedDict() for node in nodes: if node.initialize() is not None: params.update(node.initialize()) #Creates matrices params = init_tparams(params) #Make the parameters theano.shared s_0_tr = rnn.get_init_state(batch_size) s_0_val = T.zeros((batch_size, 2 * rnn_dim), dtype=theano.config.floatX) s_0_val = T.unbroadcast( s_0_val, *range(s_0_val.ndim) ) #[0,1] this is to raise an error if length of dimensions are not 1 #x_1_temp = x_1.fprop([x], params) def inner_val_fn(s_tm1): ''' phi_1_t = phi_1.fprop([x_t, s_tm1], params) phi_mu_t = phi_mu.fprop([phi_1_t], params) phi_sig_t = phi_sig.fprop([phi_1_t], params) ''' prior_1_t = prior_1.fprop([s_tm1], params) prior_mu_t = prior_mu.fprop([prior_1_t], params) prior_sig_t = prior_sig.fprop([prior_1_t], params) z_t = Gaussian_sample(prior_mu_t, prior_sig_t) z_1_t = z_1.fprop([z_t], params) theta_1_t = theta_1.fprop([z_1_t, s_tm1], params) theta_mu_t = theta_mu.fprop([theta_1_t], params) theta_sig_t = theta_sig.fprop([theta_1_t], params) coeff_t = coeff.fprop([theta_1_t], params) x_t = GMM_sample(theta_mu_t, theta_sig_t, coeff_t) #Gaussian_sample(theta_mu_t, theta_sig_t) x_1_t = x_1.fprop([x_t], params) s_t = rnn.fprop([[x_1_t, z_1_t], [s_tm1]], params) return s_t, x_t, z_t, theta_1_t, theta_mu_t, theta_sig_t, coeff_t # prior_mu_temp_val, prior_sig_temp_val ((s_temp_val, prediction_val, z_t_temp_val, theta_1_temp_val, theta_mu_temp_val, theta_sig_temp_val, coeff_temp_val), updates_val) =\ theano.scan(fn=inner_val_fn , n_steps=n_steps, #already 1 subtracted if doing next step outputs_info=[s_0_val, None, None, None, None, None, None]) for k, v in updates_val.iteritems(): k.default_update = v def inner_train_fn(x_t, s_tm1): x_1_t = x_1.fprop([x_t], params) phi_1_t = phi_1.fprop([x_1_t, s_tm1], params) phi_mu_t = phi_mu.fprop([phi_1_t], params) phi_sig_t = phi_sig.fprop([phi_1_t], params) prior_1_t = prior_1.fprop([s_tm1], params) prior_mu_t = prior_mu.fprop([prior_1_t], params) prior_sig_t = prior_sig.fprop([prior_1_t], params) z_t = Gaussian_sample(phi_mu_t, phi_sig_t) z_1_t = z_1.fprop([z_t], params) theta_1_t = theta_1.fprop([z_1_t, s_tm1], params) theta_mu_t = theta_mu.fprop([theta_1_t], params) theta_sig_t = theta_sig.fprop([theta_1_t], params) coeff_t = coeff.fprop([theta_1_t], params) #corr_t = corr.fprop([theta_1_t], params) #binary_t = binary.fprop([theta_1_t], params) pred = GMM_sample(theta_mu_t, theta_sig_t, coeff_t) #Gaussian_sample(theta_mu_t, theta_sig_t) s_t = rnn.fprop([[x_1_t, z_1_t], [s_tm1]], params) #y_pred = dissag_pred.fprop([s_t], params) return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_t, z_1_t, theta_1_t, theta_mu_t, theta_sig_t, coeff_t, pred #, y_pred #corr_temp, binary_temp ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,z_t_temp, z_1_temp, theta_1_temp, theta_mu_temp, theta_sig_temp, coeff_temp, prediction), updates) =\ theano.scan(fn=inner_train_fn, sequences=[inputX],#[x_1_temp], outputs_info=[s_0_tr, None, None, None, None, None, None, None, None, None, None, None]) for k, v in updates.iteritems(): k.default_update = v ######### TRAINING GRAPH ######### s_temp = concatenate( [s_0_tr[None, :, :], s_temp[:-1]], axis=0 ) # seems like this is for creating an additional dimension to s_0 s_temp.name = 'h_1' #gisse z_1_temp.name = 'z_1' #gisse z_t_temp.name = 'z' theta_mu_temp.name = 'theta_mu_temp' theta_sig_temp.name = 'theta_sig_temp' coeff_temp.name = 'coeff' prediction.name = 'pred_' + str(flgAgg) mse = T.mean( (prediction - targetX)**2) # As axis = None is calculated for all mae = T.mean(T.abs_(prediction - targetX)) mse.name = 'mse' mae.name = 'mae' x_in = inputX.reshape((batch_size * n_steps, -1)) kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp) target_shape = x[:, 1:].shape theta_mu_in = theta_mu_temp.reshape((batch_size * n_steps, -1)) theta_sig_in = theta_sig_temp.reshape((batch_size * n_steps, -1)) coeff_in = coeff_temp.reshape((batch_size * n_steps, -1)) recon = GMM( x_in, theta_mu_in, theta_sig_in, coeff_in ) # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in) recon = recon.reshape((batch_size, n_steps)) recon.name = 'gmm_out' recon_term = recon.sum(axis=0).mean() recon_term.name = 'recon_term' kl_term = kl_temp.sum(axis=0).mean() kl_term.name = 'kl_term' nll_upper_bound = recon_term + kl_term #+ mse if (flgMSE): nll_upper_bound = nll_upper_bound + mse nll_upper_bound.name = 'nll_upper_bound' ######### TESTING GRAPH ######### s_temp_val = concatenate( [s_0_val[None, :, :], s_temp_val[:-1]], axis=0 ) # seems like this is for creating an additional dimension to s_0 s_temp_val.name = 'h_1_val' #gisse #z_1_temp_val.name = 'z_1_val'#gisse z_t_temp_val.name = 'z_val' theta_mu_temp_val.name = 'theta_mu_temp_val' theta_sig_temp_val.name = 'theta_sig_temp_val' coeff_temp_val.name = 'coeff_val' prediction_val.name = 'generated_' + str(flgAgg) mse_val = T.mean( (prediction_val - targetX)**2) # As axis = None is calculated for all mae_val = T.mean(T.abs_(prediction_val - targetX)) mse_val.name = 'mse_val' mae_val.name = 'mae_val' x_in_val = inputX.reshape((batch_size * n_steps, -1)) # No sense in calculate distance to a distribution because we are not calculating phi #kl_temp_val = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp) theta_mu_in_val = theta_mu_temp_val.reshape((batch_size * n_steps, -1)) theta_sig_in_val = theta_sig_temp_val.reshape((batch_size * n_steps, -1)) coeff_in_val = coeff_temp_val.reshape((batch_size * n_steps, -1)) recon_val = GMM( x_in_val, theta_mu_in_val, theta_sig_in_val, coeff_in_val ) # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in) recon_val = recon_val.reshape((batch_size, n_steps)) recon_val.name = 'gmm_out_val' recon_term_val = recon_val.sum(axis=0).mean() recon_term_val.name = 'recon_term_val' ###################################### model.inputs = [x, mask] model.params = params model.nodes = nodes optimizer = Adam(lr=lr) header = "epoch,log,kl,nll,mse,mae\n" extension = [ GradientClipping(batch_size=batch_size), EpochCount(epoch, save_path, header), Monitoring( freq=monitoring_freq, ddout=[ recon_term_val, mse_val, mae_val, prediction_val, theta_mu_temp_val ], indexSep=3, indexDDoutPlot=[(0, prediction_val)], instancesPlot=instancesPlot, #, 80,150 data=[Iterator(valid_data, batch_size)], savedFolder=save_path), Picklize(freq=monitoring_freq, path=save_path), EarlyStopping(freq=monitoring_freq, path=save_path, channel=channel_name), WeightNorm() ] lr_iterations = {0: lr} mainloop = Training( name=pkl_name, data=Iterator(train_data, batch_size), model=model, optimizer=optimizer, cost=nll_upper_bound, outputs=[recon_term, kl_term, nll_upper_bound, mse, mae], extension=extension, lr_iterations=lr_iterations) mainloop.run() fLog = open(save_path + '/output.csv', 'w') fLog.write(str(lr_iterations) + "\n") fLog.write(str(windows) + "\n") fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,z2s_dim\n") fLog.write("{},{},{},{},{}\n".format(q_z_dim, p_z_dim, p_x_dim, x2s_dim, z2s_dim)) header = "epoch,log,mse,mae\n" fLog.write(header) for i, item in enumerate(mainloop.trainlog.monitor['recon_term_val']): f = mainloop.trainlog.monitor['epoch'][i] a = mainloop.trainlog.monitor['recon_term_val'][i] d = mainloop.trainlog.monitor['mse_val'][i] e = mainloop.trainlog.monitor['mae_val'][i] fLog.write("{},{},{},{}\n".format(f, a, d, e))
def main(args): trial = int(args["trial"]) pkl_name = "vrnn_gauss_%d" % trial channel_name = "valid_nll_upper_bound" data_path = args["data_path"] save_path = args["save_path"] data_path = os.path.expanduser(args["data_path"]) save_path = os.path.expanduser(args["save_path"]) monitoring_freq = int(args["monitoring_freq"]) force_saving_freq = int(args["force_saving_freq"]) reset_freq = int(args["reset_freq"]) epoch = int(args["epoch"]) batch_size = int(args["batch_size"]) m_batch_size = int(args["m_batch_size"]) x_dim = int(args["x_dim"]) z_dim = int(args["z_dim"]) rnn_dim = int(args["rnn_dim"]) lr = float(args["lr"]) debug = int(args["debug"]) print "trial no. %d" % trial print "batch size %d" % batch_size print "learning rate %f" % lr print "saving pkl file '%s'" % pkl_name print "to the save path '%s'" % save_path q_z_dim = 500 p_z_dim = 500 p_x_dim = 600 x2s_dim = 600 z2s_dim = 500 target_dim = x_dim file_name = "blizzard_tbptt" normal_params = np.load(data_path + file_name + "_normal.npz") X_mean = normal_params["X_mean"] X_std = normal_params["X_std"] model = Model() train_data = Blizzard_tbptt( name="train", path=data_path, frame_size=x_dim, file_name=file_name, X_mean=X_mean, X_std=X_std ) valid_data = Blizzard_tbptt( name="valid", path=data_path, frame_size=x_dim, file_name=file_name, X_mean=X_mean, X_std=X_std ) x = train_data.theano_vars() m_x = valid_data.theano_vars() if debug: x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=theano.config.floatX) m_x.tag.test_value = np.zeros((15, m_batch_size, x_dim), dtype=theano.config.floatX) init_W = InitCell("rand") init_U = InitCell("ortho") init_b = InitCell("zeros") init_b_sig = InitCell("const", mean=0.6) x_1 = FullyConnectedLayer( name="x_1", parent=["x_t"], parent_dim=[x_dim], nout=x2s_dim, unit="relu", init_W=init_W, init_b=init_b ) x_2 = FullyConnectedLayer( name="x_2", parent=["x_1"], parent_dim=[x2s_dim], nout=x2s_dim, unit="relu", init_W=init_W, init_b=init_b ) x_3 = FullyConnectedLayer( name="x_3", parent=["x_2"], parent_dim=[x2s_dim], nout=x2s_dim, unit="relu", init_W=init_W, init_b=init_b ) x_4 = FullyConnectedLayer( name="x_4", parent=["x_3"], parent_dim=[x2s_dim], nout=x2s_dim, unit="relu", init_W=init_W, init_b=init_b ) z_1 = FullyConnectedLayer( name="z_1", parent=["z_t"], parent_dim=[z_dim], nout=z2s_dim, unit="relu", init_W=init_W, init_b=init_b ) z_2 = FullyConnectedLayer( name="z_2", parent=["z_1"], parent_dim=[z2s_dim], nout=z2s_dim, unit="relu", init_W=init_W, init_b=init_b ) z_3 = FullyConnectedLayer( name="z_3", parent=["z_2"], parent_dim=[z2s_dim], nout=z2s_dim, unit="relu", init_W=init_W, init_b=init_b ) z_4 = FullyConnectedLayer( name="z_4", parent=["z_3"], parent_dim=[z2s_dim], nout=z2s_dim, unit="relu", init_W=init_W, init_b=init_b ) rnn = LSTM( name="rnn", parent=["x_4", "z_4"], parent_dim=[x2s_dim, z2s_dim], nout=rnn_dim, unit="tanh", init_W=init_W, init_U=init_U, init_b=init_b, ) phi_1 = FullyConnectedLayer( name="phi_1", parent=["x_4", "s_tm1"], parent_dim=[x2s_dim, rnn_dim], nout=q_z_dim, unit="relu", init_W=init_W, init_b=init_b, ) phi_2 = FullyConnectedLayer( name="phi_2", parent=["phi_1"], parent_dim=[q_z_dim], nout=q_z_dim, unit="relu", init_W=init_W, init_b=init_b ) phi_3 = FullyConnectedLayer( name="phi_3", parent=["phi_2"], parent_dim=[q_z_dim], nout=q_z_dim, unit="relu", init_W=init_W, init_b=init_b ) phi_4 = FullyConnectedLayer( name="phi_4", parent=["phi_3"], parent_dim=[q_z_dim], nout=q_z_dim, unit="relu", init_W=init_W, init_b=init_b ) phi_mu = FullyConnectedLayer( name="phi_mu", parent=["phi_4"], parent_dim=[q_z_dim], nout=z_dim, unit="linear", init_W=init_W, init_b=init_b ) phi_sig = FullyConnectedLayer( name="phi_sig", parent=["phi_4"], parent_dim=[q_z_dim], nout=z_dim, unit="softplus", cons=1e-4, init_W=init_W, init_b=init_b_sig, ) prior_1 = FullyConnectedLayer( name="prior_1", parent=["s_tm1"], parent_dim=[rnn_dim], nout=p_z_dim, unit="relu", init_W=init_W, init_b=init_b ) prior_2 = FullyConnectedLayer( name="prior_2", parent=["prior_1"], parent_dim=[p_z_dim], nout=p_z_dim, unit="relu", init_W=init_W, init_b=init_b, ) prior_3 = FullyConnectedLayer( name="prior_3", parent=["prior_2"], parent_dim=[p_z_dim], nout=p_z_dim, unit="relu", init_W=init_W, init_b=init_b, ) prior_4 = FullyConnectedLayer( name="prior_4", parent=["prior_3"], parent_dim=[p_z_dim], nout=p_z_dim, unit="relu", init_W=init_W, init_b=init_b, ) prior_mu = FullyConnectedLayer( name="prior_mu", parent=["prior_4"], parent_dim=[p_z_dim], nout=z_dim, unit="linear", init_W=init_W, init_b=init_b, ) prior_sig = FullyConnectedLayer( name="prior_sig", parent=["prior_4"], parent_dim=[p_z_dim], nout=z_dim, unit="softplus", cons=1e-4, init_W=init_W, init_b=init_b_sig, ) theta_1 = FullyConnectedLayer( name="theta_1", parent=["z_4", "s_tm1"], parent_dim=[z2s_dim, rnn_dim], nout=p_x_dim, unit="relu", init_W=init_W, init_b=init_b, ) theta_2 = FullyConnectedLayer( name="theta_2", parent=["theta_1"], parent_dim=[p_x_dim], nout=p_x_dim, unit="relu", init_W=init_W, init_b=init_b, ) theta_3 = FullyConnectedLayer( name="theta_3", parent=["theta_2"], parent_dim=[p_x_dim], nout=p_x_dim, unit="relu", init_W=init_W, init_b=init_b, ) theta_4 = FullyConnectedLayer( name="theta_4", parent=["theta_3"], parent_dim=[p_x_dim], nout=p_x_dim, unit="relu", init_W=init_W, init_b=init_b, ) theta_mu = FullyConnectedLayer( name="theta_mu", parent=["theta_4"], parent_dim=[p_x_dim], nout=target_dim, unit="linear", init_W=init_W, init_b=init_b, ) theta_sig = FullyConnectedLayer( name="theta_sig", parent=["theta_4"], parent_dim=[p_x_dim], nout=target_dim, unit="softplus", cons=1e-4, init_W=init_W, init_b=init_b_sig, ) nodes = [ rnn, x_1, x_2, x_3, x_4, z_1, z_2, z_3, z_4, phi_1, phi_2, phi_3, phi_4, phi_mu, phi_sig, prior_1, prior_2, prior_3, prior_4, prior_mu, prior_sig, theta_1, theta_2, theta_3, theta_4, theta_mu, theta_sig, ] params = OrderedDict() for node in nodes: if node.initialize() is not None: params.update(node.initialize()) params = init_tparams(params) step_count = sharedX(0, name="step_count") last_rnn = np.zeros((batch_size, rnn_dim * 2), dtype=theano.config.floatX) rnn_tm1 = sharedX(last_rnn, name="rnn_tm1") shared_updates = OrderedDict() shared_updates[step_count] = step_count + 1 s_0 = T.switch(T.eq(T.mod(step_count, reset_freq), 0), rnn.get_init_state(batch_size), rnn_tm1) x_1_temp = x_1.fprop([x], params) x_2_temp = x_2.fprop([x_1_temp], params) x_3_temp = x_3.fprop([x_2_temp], params) x_4_temp = x_4.fprop([x_3_temp], params) def inner_fn(x_t, s_tm1): phi_1_t = phi_1.fprop([x_t, s_tm1], params) phi_2_t = phi_2.fprop([phi_1_t], params) phi_3_t = phi_3.fprop([phi_2_t], params) phi_4_t = phi_4.fprop([phi_3_t], params) phi_mu_t = phi_mu.fprop([phi_4_t], params) phi_sig_t = phi_sig.fprop([phi_4_t], params) prior_1_t = prior_1.fprop([s_tm1], params) prior_2_t = prior_2.fprop([prior_1_t], params) prior_3_t = prior_3.fprop([prior_2_t], params) prior_4_t = prior_4.fprop([prior_3_t], params) prior_mu_t = prior_mu.fprop([prior_4_t], params) prior_sig_t = prior_sig.fprop([prior_4_t], params) z_t = Gaussian_sample(phi_mu_t, phi_sig_t) z_1_t = z_1.fprop([z_t], params) z_2_t = z_2.fprop([z_1_t], params) z_3_t = z_3.fprop([z_2_t], params) z_4_t = z_4.fprop([z_3_t], params) s_t = rnn.fprop([[x_t, z_4_t], [s_tm1]], params) return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_4_t, z_t ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp, z_4_temp, z_t), updates) = theano.scan( fn=inner_fn, sequences=[x_4_temp], outputs_info=[s_0, None, None, None, None, None, None] ) for k, v in updates.iteritems(): k.default_update = v shared_updates[rnn_tm1] = s_temp[-1] s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0) theta_1_temp = theta_1.fprop([z_4_temp, s_temp], params) theta_2_temp = theta_2.fprop([theta_1_temp], params) theta_3_temp = theta_3.fprop([theta_2_temp], params) theta_4_temp = theta_4.fprop([theta_3_temp], params) theta_mu_temp = theta_mu.fprop([theta_4_temp], params) theta_sig_temp = theta_sig.fprop([theta_4_temp], params) kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp) #### recon = Gaussian(x, theta_mu_temp, theta_sig_temp) recon = Gaussian(x, theta_mu_temp, theta_sig_temp) - Gaussian(z_t, phi_mu_temp, phi_sig_temp) recon += Gaussian(z_t, prior_mu_temp, prior_sig_temp) recon_term = recon.mean() / 5.0 kl_term = kl_temp.mean() ##### nll_upper_bound = recon_term + kl_term nll_upper_bound = recon_term nll_upper_bound.name = "nll_upper_bound" m_x_1_temp = x_1.fprop([m_x], params) m_x_2_temp = x_2.fprop([m_x_1_temp], params) m_x_3_temp = x_3.fprop([m_x_2_temp], params) m_x_4_temp = x_4.fprop([m_x_3_temp], params) m_s_0 = rnn.get_init_state(m_batch_size) ( (m_s_temp, m_phi_mu_temp, m_phi_sig_temp, m_prior_mu_temp, m_prior_sig_temp, m_z_4_temp, m_z_t), m_updates, ) = theano.scan(fn=inner_fn, sequences=[m_x_4_temp], outputs_info=[m_s_0, None, None, None, None, None, None]) for k, v in m_updates.iteritems(): k.default_update = v m_s_temp = concatenate([m_s_0[None, :, :], m_s_temp[:-1]], axis=0) m_theta_1_temp = theta_1.fprop([m_z_4_temp, m_s_temp], params) m_theta_2_temp = theta_2.fprop([m_theta_1_temp], params) m_theta_3_temp = theta_3.fprop([m_theta_2_temp], params) m_theta_4_temp = theta_4.fprop([m_theta_3_temp], params) m_theta_mu_temp = theta_mu.fprop([m_theta_4_temp], params) m_theta_sig_temp = theta_sig.fprop([m_theta_4_temp], params) m_kl_temp = KLGaussianGaussian(m_phi_mu_temp, m_phi_sig_temp, m_prior_mu_temp, m_prior_sig_temp) m_recon = Gaussian(m_x, m_theta_mu_temp, m_theta_sig_temp) m_recon_term = m_recon.mean() m_kl_term = m_kl_temp.mean() m_nll_upper_bound = m_recon_term + m_kl_term m_nll_upper_bound.name = "nll_upper_bound" m_recon_term.name = "recon_term" m_kl_term.name = "kl_term" max_x = m_x.max() mean_x = m_x.mean() min_x = m_x.min() max_x.name = "max_x" mean_x.name = "mean_x" min_x.name = "min_x" max_theta_mu = m_theta_mu_temp.max() mean_theta_mu = m_theta_mu_temp.mean() min_theta_mu = m_theta_mu_temp.min() max_theta_mu.name = "max_theta_mu" mean_theta_mu.name = "mean_theta_mu" min_theta_mu.name = "min_theta_mu" max_theta_sig = m_theta_sig_temp.max() mean_theta_sig = m_theta_sig_temp.mean() min_theta_sig = m_theta_sig_temp.min() max_theta_sig.name = "max_theta_sig" mean_theta_sig.name = "mean_theta_sig" min_theta_sig.name = "min_theta_sig" max_phi_sig = m_phi_sig_temp.max() mean_phi_sig = m_phi_sig_temp.mean() min_phi_sig = m_phi_sig_temp.min() max_phi_sig.name = "max_phi_sig" mean_phi_sig.name = "mean_phi_sig" min_phi_sig.name = "min_phi_sig" max_prior_sig = m_prior_sig_temp.max() mean_prior_sig = m_prior_sig_temp.mean() min_prior_sig = m_prior_sig_temp.min() max_prior_sig.name = "max_prior_sig" mean_prior_sig.name = "mean_prior_sig" min_prior_sig.name = "min_prior_sig" model.inputs = [x] model.params = params model.nodes = nodes model.set_updates(shared_updates) optimizer = Adam(lr=lr) monitor_fn = theano.function( inputs=[m_x], outputs=[ m_nll_upper_bound, m_recon_term, m_kl_term, max_phi_sig, mean_phi_sig, min_phi_sig, max_prior_sig, mean_prior_sig, min_prior_sig, max_theta_sig, mean_theta_sig, min_theta_sig, max_x, mean_x, min_x, max_theta_mu, mean_theta_mu, min_theta_mu, ], on_unused_input="ignore", ) extension = [ GradientClipping(batch_size=batch_size, check_nan=1), EpochCount(epoch), Monitoring( freq=monitoring_freq, monitor_fn=monitor_fn, ddout=[ m_nll_upper_bound, m_recon_term, m_kl_term, max_phi_sig, mean_phi_sig, min_phi_sig, max_prior_sig, mean_prior_sig, min_prior_sig, max_theta_sig, mean_theta_sig, min_theta_sig, max_x, mean_x, min_x, max_theta_mu, mean_theta_mu, min_theta_mu, ], data=[ Iterator(train_data, m_batch_size, start=0, end=112640), Iterator(valid_data, m_batch_size, start=2040064, end=2152704), ], ), Picklize(freq=monitoring_freq, force_save_freq=force_saving_freq, path=save_path), EarlyStopping(freq=monitoring_freq, force_save_freq=force_saving_freq, path=save_path, channel=channel_name), WeightNorm(), ] mainloop = Training( name=pkl_name, data=KIter(train_data, batch_size, start=0, end=2040064), model=model, optimizer=optimizer, cost=nll_upper_bound, outputs=[nll_upper_bound], extension=extension, ) mainloop.run()
def main(args): trial = int(args['trial']) pkl_name = 'rnn_gauss_%d' % trial channel_name = 'valid_nll' data_path = args['data_path'] save_path = args['save_path'] monitoring_freq = int(args['monitoring_freq']) epoch = int(args['epoch']) batch_size = int(args['batch_size']) x_dim = int(args['x_dim']) z_dim = int(args['z_dim']) rnn_dim = int(args['rnn_dim']) lr = float(args['lr']) debug = int(args['debug']) print "trial no. %d" % trial print "batch size %d" % batch_size print "learning rate %f" % lr print "saving pkl file '%s'" % pkl_name print "to the save path '%s'" % save_path x2s_dim = 340 s2x_dim = 340 target_dim = x_dim - 1 model = Model() train_data = IAMOnDB(name='train', prep='normalize', cond=False, path=data_path) X_mean = train_data.X_mean X_std = train_data.X_std valid_data = IAMOnDB(name='valid', prep='normalize', cond=False, path=data_path, X_mean=X_mean, X_std=X_std) init_W = InitCell('rand') init_U = InitCell('ortho') init_b = InitCell('zeros') init_b_sig = InitCell('const', mean=0.6) x, mask = train_data.theano_vars() if debug: x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32) temp = np.ones((15, batch_size), dtype=np.float32) temp[:, -2:] = 0. mask.tag.test_value = temp x_1 = FullyConnectedLayer(name='x_1', parent=['x_t'], parent_dim=[x_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) rnn = LSTM(name='rnn', parent=['x_1'], parent_dim=[x2s_dim], nout=rnn_dim, unit='tanh', init_W=init_W, init_U=init_U, init_b=init_b) theta_1 = FullyConnectedLayer(name='theta_1', parent=['s_tm1'], parent_dim=[rnn_dim], nout=s2x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_mu = FullyConnectedLayer(name='theta_mu', parent=['theta_1'], parent_dim=[s2x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_sig = FullyConnectedLayer(name='theta_sig', parent=['theta_1'], parent_dim=[s2x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) corr = FullyConnectedLayer(name='corr', parent=['theta_1'], parent_dim=[s2x_dim], nout=1, unit='tanh', init_W=init_W, init_b=init_b) binary = FullyConnectedLayer(name='binary', parent=['theta_1'], parent_dim=[s2x_dim], nout=1, unit='sigmoid', init_W=init_W, init_b=init_b) nodes = [rnn, x_1, theta_1, theta_mu, theta_sig, corr, binary] params = OrderedDict() for node in nodes: if node.initialize() is not None: params.update(node.initialize()) params = init_tparams(params) s_0 = rnn.get_init_state(batch_size) x_1_temp = x_1.fprop([x], params) def inner_fn(x_t, s_tm1): s_t = rnn.fprop([[x_t], [s_tm1]], params) return s_t ((s_temp), updates) = theano.scan(fn=inner_fn, sequences=[x_1_temp], outputs_info=[s_0]) for k, v in updates.iteritems(): k.default_update = v s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0) theta_1_temp = theta_1.fprop([s_temp], params) theta_mu_temp = theta_mu.fprop([theta_1_temp], params) theta_sig_temp = theta_sig.fprop([theta_1_temp], params) corr_temp = corr.fprop([theta_1_temp], params) binary_temp = binary.fprop([theta_1_temp], params) x_shape = x.shape x_in = x.reshape((x_shape[0]*x_shape[1], -1)) theta_mu_in = theta_mu_temp.reshape((x_shape[0]*x_shape[1], -1)) theta_sig_in = theta_sig_temp.reshape((x_shape[0]*x_shape[1], -1)) corr_in = corr_temp.reshape((x_shape[0]*x_shape[1], -1)) binary_in = binary_temp.reshape((x_shape[0]*x_shape[1], -1)) recon = BiGauss(x_in, theta_mu_in, theta_sig_in, corr_in, binary_in) recon = recon.reshape((x_shape[0], x_shape[1])) recon = recon * mask recon_term = recon.sum(axis=0).mean() recon_term.name = 'nll' max_x = x.max() mean_x = x.mean() min_x = x.min() max_x.name = 'max_x' mean_x.name = 'mean_x' min_x.name = 'min_x' max_theta_mu = theta_mu_in.max() mean_theta_mu = theta_mu_in.mean() min_theta_mu = theta_mu_in.min() max_theta_mu.name = 'max_theta_mu' mean_theta_mu.name = 'mean_theta_mu' min_theta_mu.name = 'min_theta_mu' max_theta_sig = theta_sig_in.max() mean_theta_sig = theta_sig_in.mean() min_theta_sig = theta_sig_in.min() max_theta_sig.name = 'max_theta_sig' mean_theta_sig.name = 'mean_theta_sig' min_theta_sig.name = 'min_theta_sig' model.inputs = [x, mask] model._params = params model.nodes = nodes optimizer = Adam( lr=lr ) extension = [ GradientClipping(batch_size=batch_size), EpochCount(epoch), Monitoring(freq=monitoring_freq, ddout=[recon_term, max_theta_sig, mean_theta_sig, min_theta_sig, max_x, mean_x, min_x, max_theta_mu, mean_theta_mu, min_theta_mu], data=[Iterator(valid_data, batch_size)]), Picklize(freq=monitoring_freq, path=save_path), EarlyStopping(freq=monitoring_freq, path=save_path), WeightNorm() ] mainloop = Training( name=pkl_name, data=Iterator(train_data, batch_size), model=model, optimizer=optimizer, cost=recon_term, outputs=[recon_term], extension=extension ) mainloop.run()
def main(args): theano.optimizer = 'fast_compile' #theano.config.exception_verbosity='high' trial = int(args['trial']) pkl_name = 'vrnn_gmm_%d' % trial channel_name = 'nll_upper_bound' data_path = args['data_path'] save_path = args[ 'save_path'] #+'/aggVSdisag_distrib/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M") period = int(args['period']) n_steps = int(args['n_steps']) stride_train = int(args['stride_train']) stride_test = n_steps typeLoad = int(args['typeLoad']) flgMSE = int(args['flgMSE']) monitoring_freq = int(args['monitoring_freq']) epoch = int(args['epoch']) batch_size = int(args['batch_size']) x_dim = int(args['x_dim']) y_dim = int(args['y_dim']) z_dim = int(args['z_dim']) rnn_dim = int(args['rnn_dim']) k = int(args['num_k']) #a mixture of K Gaussian functions lr = float(args['lr']) origLR = lr debug = int(args['debug']) print "trial no. %d" % trial print "batch size %d" % batch_size print "learning rate %f" % lr print "saving pkl file '%s'" % pkl_name print "to the save path '%s'" % save_path q_z_dim = 350 p_z_dim = 400 p_x_dim = 450 x2s_dim = 400 y2s_dim = 200 z2s_dim = 350 target_dim = k # As different appliances are separeted in theta_mu1, theta_mu2, etc... each one is just created from k different Gaussians model = Model() Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_ukdale( data_path, windows, appliances, numApps=-1, period=period, n_steps=n_steps, stride_train=stride_train, stride_test=stride_test, flgAggSumScaled=1, flgFilterZeros=1, typeLoad=typeLoad, trainPer=0.5, valPer=0.25, testPer=0.25) instancesPlot = {0: [5]} #instancesPlot = reader.build_dict_instances_plot(listDates, batch_size, Xval.shape[0]) train_data = UKdale( name='train', prep='normalize', cond=True, # False #path=data_path, inputX=Xtrain, labels=ytrain) X_mean = train_data.X_mean X_std = train_data.X_std valid_data = UKdale( name='valid', prep='normalize', cond=True, # False #path=data_path, X_mean=X_mean, X_std=X_std, inputX=Xval, labels=yval) test_data = UKdale( name='valid', prep='normalize', cond=True, # False #path=data_path, X_mean=X_mean, X_std=X_std, inputX=Xtest, labels=ytest) init_W = InitCell('rand') init_U = InitCell('ortho') init_b = InitCell('zeros') init_b_sig = InitCell('const', mean=0.6) x, mask, y, y_mask = train_data.theano_vars() scheduleSamplingMask = T.fvector('schedMask') x.name = 'x_original' if debug: x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32) temp = np.ones((15, batch_size), dtype=np.float32) temp[:, -2:] = 0. mask.tag.test_value = temp x_1 = FullyConnectedLayer(name='x_1', parent=['x_t'], parent_dim=[x_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) y_1 = FullyConnectedLayer(name='y_1', parent=['y_t'], parent_dim=[y_dim], nout=y2s_dim, unit='relu', init_W=init_W, init_b=init_b) z_1 = FullyConnectedLayer(name='z_1', parent=['z_t'], parent_dim=[z_dim], nout=z2s_dim, unit='relu', init_W=init_W, init_b=init_b) rnn = LSTM(name='rnn', parent=['x_1', 'z_1', 'y_1'], parent_dim=[x2s_dim, z2s_dim, y2s_dim], nout=rnn_dim, unit='tanh', init_W=init_W, init_U=init_U, init_b=init_b) phi_1 = FullyConnectedLayer(name='phi_1', parent=['x_1', 's_tm1', 'y_1'], parent_dim=[x2s_dim, rnn_dim, y2s_dim], nout=q_z_dim, unit='relu', init_W=init_W, init_b=init_b) phi_mu = FullyConnectedLayer(name='phi_mu', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) phi_sig = FullyConnectedLayer(name='phi_sig', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) prior_1 = FullyConnectedLayer(name='prior_1', parent=['x_1', 's_tm1'], parent_dim=[x2s_dim, rnn_dim], nout=p_z_dim, unit='relu', init_W=init_W, init_b=init_b) prior_mu = FullyConnectedLayer(name='prior_mu', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) prior_sig = FullyConnectedLayer(name='prior_sig', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_1 = FullyConnectedLayer(name='theta_1', parent=['z_1', 's_tm1'], parent_dim=[z2s_dim, rnn_dim], nout=p_x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_mu1 = FullyConnectedLayer(name='theta_mu1', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_mu2 = FullyConnectedLayer(name='theta_mu2', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_mu3 = FullyConnectedLayer(name='theta_mu3', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_mu4 = FullyConnectedLayer(name='theta_mu4', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_mu5 = FullyConnectedLayer(name='theta_mu5', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_sig1 = FullyConnectedLayer(name='theta_sig1', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_sig2 = FullyConnectedLayer(name='theta_sig2', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_sig3 = FullyConnectedLayer(name='theta_sig3', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_sig4 = FullyConnectedLayer(name='theta_sig4', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_sig5 = FullyConnectedLayer(name='theta_sig5', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) coeff1 = FullyConnectedLayer(name='coeff1', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) coeff2 = FullyConnectedLayer(name='coeff2', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) coeff3 = FullyConnectedLayer(name='coeff3', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) coeff4 = FullyConnectedLayer(name='coeff4', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) coeff5 = FullyConnectedLayer(name='coeff5', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) corr = FullyConnectedLayer(name='corr', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='tanh', init_W=init_W, init_b=init_b) binary = FullyConnectedLayer(name='binary', parent=['theta_1'], parent_dim=[p_x_dim], nout=1, unit='sigmoid', init_W=init_W, init_b=init_b) nodes = [ rnn, x_1, y_1, z_1, #dissag_pred, phi_1, phi_mu, phi_sig, prior_1, prior_mu, prior_sig, theta_1, theta_mu1, theta_sig1, coeff1, theta_mu2, theta_sig2, coeff2, theta_mu3, theta_sig3, coeff3, theta_mu4, theta_sig4, coeff4, theta_mu5, theta_sig5, coeff5 ] params = OrderedDict() for node in nodes: if node.initialize() is not None: params.update(node.initialize()) params = init_tparams(params) s_0 = rnn.get_init_state(batch_size) x_1_temp = x_1.fprop([x], params) y_1_temp = y_1.fprop([y], params) def inner_fn_test(x_t, s_tm1): prior_1_t = prior_1.fprop([x_t, s_tm1], params) prior_mu_t = prior_mu.fprop([prior_1_t], params) prior_sig_t = prior_sig.fprop([prior_1_t], params) z_t = Gaussian_sample( prior_mu_t, prior_sig_t ) #in the original code it is gaussian. GMM is for the generation z_1_t = z_1.fprop([z_t], params) theta_1_t = theta_1.fprop([z_1_t, s_tm1], params) theta_mu1_t = theta_mu1.fprop([theta_1_t], params) theta_sig1_t = theta_sig1.fprop([theta_1_t], params) coeff1_t = coeff1.fprop([theta_1_t], params) y_pred1 = GMM_sampleY( theta_mu1_t, theta_sig1_t, coeff1_t) #Gaussian_sample(theta_mu_t, theta_sig_t) theta_mu2_t = theta_mu2.fprop([theta_1_t], params) theta_sig2_t = theta_sig2.fprop([theta_1_t], params) coeff2_t = coeff2.fprop([theta_1_t], params) y_pred2 = GMM_sampleY(theta_mu2_t, theta_sig2_t, coeff2_t) y_pred1 = T.concatenate([y_pred1, y_pred2], axis=1) theta_mu3_t = theta_mu3.fprop([theta_1_t], params) theta_sig3_t = theta_sig3.fprop([theta_1_t], params) coeff3_t = coeff3.fprop([theta_1_t], params) y_pred3 = GMM_sampleY(theta_mu3_t, theta_sig3_t, coeff3_t) y_pred1 = T.concatenate([y_pred1, y_pred3], axis=1) theta_mu4_t = theta_mu4.fprop([theta_1_t], params) theta_sig4_t = theta_sig4.fprop([theta_1_t], params) coeff4_t = coeff4.fprop([theta_1_t], params) y_pred4 = GMM_sampleY(theta_mu4_t, theta_sig4_t, coeff4_t) y_pred1 = T.concatenate([y_pred1, y_pred4], axis=1) theta_mu5_t = theta_mu5.fprop([theta_1_t], params) theta_sig5_t = theta_sig5.fprop([theta_1_t], params) coeff5_t = coeff5.fprop([theta_1_t], params) y_pred5 = GMM_sampleY(theta_mu5_t, theta_sig5_t, coeff5_t) y_pred1 = T.concatenate([y_pred1, y_pred5], axis=1) pred_1_t = y_1.fprop([y_pred1], params) #y_pred = [GMM_sampleY(theta_mu_t[i], theta_sig_t[i], coeff_t[i]) for i in range(y_dim)]#T.stack([y_pred1,y_pred2],axis = 0 ) s_t = rnn.fprop([[x_t, z_1_t, pred_1_t], [s_tm1]], params) #y_pred = dissag_pred.fprop([s_t], params) return s_t, prior_mu_t, prior_sig_t, theta_mu1_t, theta_sig1_t, coeff1_t, y_pred1, theta_mu2_t, theta_sig2_t, coeff2_t, y_pred2, theta_mu3_t, theta_sig3_t, coeff3_t, y_pred3, theta_mu4_t, theta_sig4_t, coeff4_t, y_pred4, theta_mu5_t, theta_sig5_t, coeff5_t, y_pred5 #corr_temp, binary_temp ((s_temp_val, prior_mu_temp_val, prior_sig_temp_val, theta_mu1_temp_val, theta_sig1_temp_val, coeff1_temp_val, y_pred1_temp_val, theta_mu2_temp_val, theta_sig2_temp_val, coeff2_temp_val, y_pred2_temp_val, theta_mu3_temp_val, theta_sig3_temp_val, coeff3_temp_val, y_pred3_temp_val, theta_mu4_temp_val, theta_sig4_temp_val, coeff4_temp_val, y_pred4_temp_val, theta_mu5_temp_val, theta_sig5_temp_val, coeff5_temp_val, y_pred5_temp_val), updates_val) = theano.scan(fn=inner_fn_test, sequences=[x_1_temp], outputs_info=[ s_0, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None ]) for k, v in updates_val.iteritems(): k.default_update = v def inner_fn(x_t, y_t, s_tm1): phi_1_t = phi_1.fprop([x_t, s_tm1, y_t], params) phi_mu_t = phi_mu.fprop([phi_1_t], params) phi_sig_t = phi_sig.fprop([phi_1_t], params) prior_1_t = prior_1.fprop([x_t, s_tm1], params) prior_mu_t = prior_mu.fprop([prior_1_t], params) prior_sig_t = prior_sig.fprop([prior_1_t], params) z_t = Gaussian_sample( phi_mu_t, phi_sig_t ) #in the original code it is gaussian. GMM is for the generation z_1_t = z_1.fprop([z_t], params) theta_1_t = theta_1.fprop([z_1_t, s_tm1], params) theta_mu1_t = theta_mu1.fprop([theta_1_t], params) theta_sig1_t = theta_sig1.fprop([theta_1_t], params) coeff1_t = coeff1.fprop([theta_1_t], params) y_pred1 = GMM_sampleY( theta_mu1_t, theta_sig1_t, coeff1_t) #Gaussian_sample(theta_mu_t, theta_sig_t) theta_mu2_t = theta_mu2.fprop([theta_1_t], params) theta_sig2_t = theta_sig2.fprop([theta_1_t], params) coeff2_t = coeff2.fprop([theta_1_t], params) y_pred2 = GMM_sampleY(theta_mu2_t, theta_sig2_t, coeff2_t) theta_mu3_t = theta_mu3.fprop([theta_1_t], params) theta_sig3_t = theta_sig3.fprop([theta_1_t], params) coeff3_t = coeff3.fprop([theta_1_t], params) y_pred3 = GMM_sampleY(theta_mu3_t, theta_sig3_t, coeff3_t) theta_mu4_t = theta_mu4.fprop([theta_1_t], params) theta_sig4_t = theta_sig4.fprop([theta_1_t], params) coeff4_t = coeff4.fprop([theta_1_t], params) y_pred4 = GMM_sampleY(theta_mu4_t, theta_sig4_t, coeff4_t) theta_mu5_t = theta_mu5.fprop([theta_1_t], params) theta_sig5_t = theta_sig5.fprop([theta_1_t], params) coeff5_t = coeff5.fprop([theta_1_t], params) y_pred5 = GMM_sampleY(theta_mu5_t, theta_sig5_t, coeff5_t) s_t = rnn.fprop([[x_t, z_1_t, y_t], [s_tm1]], params) return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, theta_mu1_t, theta_sig1_t, coeff1_t, y_pred1, theta_mu2_t, theta_sig2_t, coeff2_t, y_pred2, theta_mu3_t, theta_sig3_t, coeff3_t, y_pred3, theta_mu4_t, theta_sig4_t, coeff4_t, y_pred4, theta_mu5_t, theta_sig5_t, coeff5_t, y_pred5 #corr_temp, binary_temp ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp, theta_mu1_temp, theta_sig1_temp, coeff1_temp, y_pred1_temp, theta_mu2_temp, theta_sig2_temp, coeff2_temp, y_pred2_temp, theta_mu3_temp, theta_sig3_temp, coeff3_temp, y_pred3_temp, theta_mu4_temp, theta_sig4_temp, coeff4_temp, y_pred4_temp, theta_mu5_temp, theta_sig5_temp, coeff5_temp, y_pred5_temp), updates) = theano.scan(fn=inner_fn, sequences=[x_1_temp, y_1_temp], outputs_info=[ s_0, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None ]) for k, v in updates.iteritems(): k.default_update = v theta_mu1_temp.name = 'theta_mu1' theta_sig1_temp.name = 'theta_sig1' coeff1_temp.name = 'coeff1' y_pred1_temp.name = 'disaggregation1' #[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1) mse1 = T.mean((y_pred1_temp - y[:, :, 0].reshape( (y.shape[0], y.shape[1], 1)))**2) mae1 = T.mean( T.abs_(y_pred1_temp - y[:, :, 0].reshape((y.shape[0], y.shape[1], 1)))) mse1.name = 'mse1' mae1.name = 'mae1' kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp) x_shape = x.shape y_shape = y.shape theta_mu2_temp.name = 'theta_mu2' theta_sig2_temp.name = 'theta_sig2' coeff2_temp.name = 'coeff2' y_pred2_temp.name = 'disaggregation2' mse2 = T.mean((y_pred2_temp - y[:, :, 1].reshape( (y.shape[0], y.shape[1], 1)))**2) # As axis = None is calculated for all mae2 = T.mean( T.abs_(y_pred2_temp - y[:, :, 1].reshape((y.shape[0], y.shape[1], 1)))) mse2.name = 'mse2' mae2.name = 'mae2' theta_mu3_temp.name = 'theta_mu3' theta_sig3_temp.name = 'theta_sig3' coeff3_temp.name = 'coeff3' y_pred3_temp.name = 'disaggregation3' mse3 = T.mean((y_pred3_temp - y[:, :, 2].reshape( (y.shape[0], y.shape[1], 1)))**2) # As axis = None is calculated for all mae3 = T.mean( T.abs_(y_pred3_temp - y[:, :, 2].reshape((y.shape[0], y.shape[1], 1)))) mse3.name = 'mse3' mae3.name = 'mae3' theta_mu4_temp.name = 'theta_mu4' theta_sig4_temp.name = 'theta_sig4' coeff4_temp.name = 'coeff4' y_pred4_temp.name = 'disaggregation4' mse4 = T.mean((y_pred4_temp - y[:, :, 3].reshape( (y.shape[0], y.shape[1], 1)))**2) # As axis = None is calculated for all mae4 = T.mean( T.abs_(y_pred4_temp - y[:, :, 3].reshape((y.shape[0], y.shape[1], 1)))) mse4.name = 'mse4' mae4.name = 'mae4' theta_mu5_temp.name = 'theta_mu5' theta_sig5_temp.name = 'theta_sig5' coeff5_temp.name = 'coeff5' y_pred5_temp.name = 'disaggregation5' mse5 = T.mean((y_pred5_temp - y[:, :, 4].reshape( (y.shape[0], y.shape[1], 1)))**2) # As axis = None is calculated for all mae5 = T.mean( T.abs_(y_pred5_temp - y[:, :, 4].reshape((y.shape[0], y.shape[1], 1)))) mse5.name = 'mse5' mae5.name = 'mae5' kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp) theta_mu1_in = theta_mu1_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig1_in = theta_sig1_temp.reshape((x_shape[0] * x_shape[1], -1)) coeff1_in = coeff1_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_mu2_in = theta_mu2_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig2_in = theta_sig2_temp.reshape((x_shape[0] * x_shape[1], -1)) coeff2_in = coeff2_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_mu3_in = theta_mu3_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig3_in = theta_sig3_temp.reshape((x_shape[0] * x_shape[1], -1)) coeff3_in = coeff3_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_mu4_in = theta_mu4_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig4_in = theta_sig4_temp.reshape((x_shape[0] * x_shape[1], -1)) coeff4_in = coeff4_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_mu5_in = theta_mu5_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig5_in = theta_sig5_temp.reshape((x_shape[0] * x_shape[1], -1)) coeff5_in = coeff5_temp.reshape((x_shape[0] * x_shape[1], -1)) x_shape = x.shape y_shape = y.shape #x_in = x.reshape((x_shape[0]*x_shape[1], -1)) y_in = y.reshape((y_shape[0] * y_shape[1], -1)) recon = GMMdisagMulti(y_dim, y_in, theta_mu1_in, theta_sig1_in, coeff1_in, theta_mu2_in, theta_sig2_in, coeff2_in, theta_mu3_in, theta_sig3_in, coeff3_in, theta_mu4_in, theta_sig4_in, coeff4_in, theta_mu5_in, theta_sig5_in, coeff5_in) #recon = GMMdisagMulti(y_dim, y_in, theta_mu1_in, theta_sig1_in, coeff1_in, theta_mu2_in, theta_sig2_in, coeff2_in,theta_mu3_in, theta_sig3_in, coeff3_in,theta_mu4_in, theta_sig4_in, coeff4_in,theta_mu5_in, theta_sig5_in, coeff5_in) recon = recon.reshape((x_shape[0], x_shape[1])) recon.name = 'gmm_out' ''' recon5 = GMM(y_in[:,4, None], theta_mu5_in, theta_sig5_in, coeff5_in) recon5 = recon.reshape((x_shape[0], x_shape[1])) ''' recon_term = recon.sum(axis=0).mean() recon_term = recon.sum(axis=0).mean() recon_term.name = 'recon_term' kl_term = kl_temp.sum(axis=0).mean() kl_term.name = 'kl_term' nll_upper_bound = recon_term + kl_term nll_upper_bound.name = 'nll_upper_bound' ######################## TEST (GENERATION) TIME #s_temp_val = concatenate([s_0[None, :, :], s_temp_val[:-1]], axis=0)# seems like this is for creating an additional dimension to s_0 theta_mu1_temp_val.name = 'theta_mu1_val' theta_sig1_temp_val.name = 'theta_sig1_val' coeff1_temp_val.name = 'coeff1_val' y_pred1_temp_val.name = 'disaggregation1_val' #[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1) mse1_val = T.mean((y_pred1_temp_val - y[:, :, 0].reshape( (y.shape[0], y.shape[1], 1)))**2) # As axis = None is calculated for all mae1_val = T.mean( T.abs_(y_pred1_temp_val - y[:, :, 0].reshape((y.shape[0], y.shape[1], 1)))) #NEURALNILM #(sum_output - sum_target) / max(sum_output, sum_target)) totPred = T.sum(y_pred1_temp_val) totReal = T.sum(y[:, :, 0]) relErr1_val = (totPred - totReal) / T.maximum(totPred, totReal) propAssigned1_val = 1 - T.sum( T.abs_(y_pred1_temp_val - y[:, :, 0].reshape( (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x)) #y_unNormalize = (y[:,:,0] * reader.stdTraining[0]) + reader.meanTraining[0] #y_pred1_temp_val = (y_pred1_temp_val * reader.stdTraining[0]) + reader.meanTraining[0] #mse1_valUnNorm = T.mean((y_pred1_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all #mae1_valUnNorm = T.mean( T.abs_(y_pred1_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))) mse1_val.name = 'mse1_val' mae1_val.name = 'mae1_val' theta_mu1_in_val = theta_mu1_temp_val.reshape( (x_shape[0] * x_shape[1], -1)) theta_sig1_in_val = theta_sig1_temp_val.reshape( (x_shape[0] * x_shape[1], -1)) coeff1_in_val = coeff1_temp_val.reshape((x_shape[0] * x_shape[1], -1)) theta_mu2_temp_val.name = 'theta_mu2_val' theta_sig2_temp_val.name = 'theta_sig2_val' coeff2_temp_val.name = 'coeff2_val' y_pred2_temp_val.name = 'disaggregation2_val' mse2_val = T.mean((y_pred2_temp_val - y[:, :, 1].reshape( (y.shape[0], y.shape[1], 1)))**2) # As axis = None is calculated for all mae2_val = T.mean( T.abs_(y_pred2_temp_val - y[:, :, 1].reshape((y.shape[0], y.shape[1], 1)))) totPred = T.sum(y_pred2_temp_val) totReal = T.sum(y[:, :, 1]) relErr2_val = (totPred - totReal) / T.maximum(totPred, totReal) propAssigned2_val = 1 - T.sum( T.abs_(y_pred2_temp_val - y[:, :, 1].reshape( (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x)) mse2_val.name = 'mse2_val' mae2_val.name = 'mae2_val' theta_mu2_in_val = theta_mu2_temp_val.reshape( (x_shape[0] * x_shape[1], -1)) theta_sig2_in_val = theta_sig2_temp_val.reshape( (x_shape[0] * x_shape[1], -1)) coeff2_in_val = coeff2_temp_val.reshape((x_shape[0] * x_shape[1], -1)) theta_mu3_temp_val.name = 'theta_mu3_val' theta_sig3_temp_val.name = 'theta_sig3_val' coeff3_temp_val.name = 'coeff3_val' y_pred3_temp_val.name = 'disaggregation3_val' mse3_val = T.mean((y_pred3_temp_val - y[:, :, 2].reshape( (y.shape[0], y.shape[1], 1)))**2) # As axis = None is calculated for all mae3_val = T.mean( T.abs_(y_pred3_temp_val - y[:, :, 2].reshape((y.shape[0], y.shape[1], 1)))) totPred = T.sum(y_pred3_temp_val) totReal = T.sum(y[:, :, 2]) relErr3_val = (totPred - totReal) / T.maximum(totPred, totReal) propAssigned3_val = 1 - T.sum( T.abs_(y_pred3_temp_val - y[:, :, 2].reshape( (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x)) mse3_val.name = 'mse3_val' mae3_val.name = 'mae3_val' theta_mu3_in_val = theta_mu3_temp_val.reshape( (x_shape[0] * x_shape[1], -1)) theta_sig3_in_val = theta_sig3_temp_val.reshape( (x_shape[0] * x_shape[1], -1)) coeff3_in_val = coeff3_temp_val.reshape((x_shape[0] * x_shape[1], -1)) theta_mu4_temp_val.name = 'theta_mu4_val' theta_sig4_temp_val.name = 'theta_sig4_val' coeff4_temp_val.name = 'coeff4_val' y_pred4_temp_val.name = 'disaggregation4_val' mse4_val = T.mean((y_pred4_temp_val - y[:, :, 3].reshape( (y.shape[0], y.shape[1], 1)))**2) # As axis = None is calculated for all mae4_val = T.mean( T.abs_(y_pred4_temp_val - y[:, :, 3].reshape((y.shape[0], y.shape[1], 1)))) totPred = T.sum(y_pred4_temp_val) totReal = T.sum(y[:, :, 3]) relErr4_val = (totPred - totReal) / T.maximum(totPred, totReal) propAssigned4_val = 1 - T.sum( T.abs_(y_pred4_temp_val - y[:, :, 3].reshape( (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x)) mse4_val.name = 'mse4_val' mae4_val.name = 'mae4_val' theta_mu4_in_val = theta_mu4_temp_val.reshape( (x_shape[0] * x_shape[1], -1)) theta_sig4_in_val = theta_sig4_temp_val.reshape( (x_shape[0] * x_shape[1], -1)) coeff4_in_val = coeff4_temp_val.reshape((x_shape[0] * x_shape[1], -1)) theta_mu5_temp_val.name = 'theta_mu5_val' theta_sig5_temp_val.name = 'theta_sig5_val' coeff5_temp_val.name = 'coeff5_val' y_pred5_temp_val.name = 'disaggregation5_val' mse5_val = T.mean((y_pred5_temp_val - y[:, :, 4].reshape( (y.shape[0], y.shape[1], 1)))**2) # As axis = None is calculated for all mae5_val = T.mean( T.abs_(y_pred5_temp_val - y[:, :, 4].reshape((y.shape[0], y.shape[1], 1)))) totPred = T.sum(y_pred5_temp_val) totReal = T.sum(y[:, :, 4]) relErr5_val = (totPred - totReal) / T.maximum(totPred, totReal) propAssigned5_val = 1 - T.sum( T.abs_(y_pred5_temp_val - y[:, :, 4].reshape( (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x)) mse5_val.name = 'mse5_val' mae5_val.name = 'mae5_val' theta_mu5_in_val = theta_mu5_temp_val.reshape( (x_shape[0] * x_shape[1], -1)) theta_sig5_in_val = theta_sig5_temp_val.reshape( (x_shape[0] * x_shape[1], -1)) coeff5_in_val = coeff5_temp_val.reshape((x_shape[0] * x_shape[1], -1)) prediction_val = T.concatenate([ y_pred1_temp_val, y_pred2_temp_val, y_pred3_temp_val, y_pred4_temp_val, y_pred5_temp_val ], axis=2) recon_val = GMMdisagMulti( y_dim, y_in, theta_mu1_in_val, theta_sig1_in_val, coeff1_in_val, theta_mu2_in_val, theta_sig2_in_val, coeff2_in_val, theta_mu3_in_val, theta_sig3_in_val, coeff3_in_val, theta_mu4_in_val, theta_sig4_in_val, coeff4_in_val, theta_mu5_in_val, theta_sig5_in_val, coeff5_in_val) recon_val = recon_val.reshape((x_shape[0], x_shape[1])) recon_val.name = 'gmm_out' totaMSE_val = (mse1_val + mse2_val + mse3_val + mse4_val + mse5_val) / y_dim totaMAE_val = (mae1_val + mae2_val + mae3_val + mae4_val + mae5_val) / y_dim ''' recon5 = GMM(y_in[:,4, None], theta_mu5_in, theta_sig5_in, coeff5_in) recon5 = recon.reshape((x_shape[0], x_shape[1])) ''' recon_term_val = recon_val.sum(axis=0).mean() recon_term_val = recon_val.sum(axis=0).mean() recon_term_val.name = 'recon_term' ###################### model.inputs = [x, mask, y, y_mask, scheduleSamplingMask] model.params = params model.nodes = nodes optimizer = Adam(lr=lr) header = "epoch,log,kl,nll_upper_bound,mse,mae\n" extension = [ GradientClipping(batch_size=batch_size), EpochCount(epoch, save_path, header), Monitoring( freq=monitoring_freq, ddout=[ nll_upper_bound, recon_term, kl_term, mse1, mae1, mse2, mae2, mse3, mae3, mse4, mae4, mse5, mae5, y_pred1_temp, y_pred2_temp, y_pred3_temp, y_pred4_temp, y_pred5_temp ], indexSep=13, indexDDoutPlot=[13], # adding indexes of ddout for the plotting #, (6,y_pred_temp) instancesPlot=instancesPlot, #0-150 data=[Iterator(valid_data, batch_size)], savedFolder=save_path), Picklize(freq=monitoring_freq, path=save_path), EarlyStopping(freq=monitoring_freq, path=save_path, channel=channel_name), WeightNorm() ] lr_iterations = {0: lr} mainloop = Training(name=pkl_name, data=Iterator(train_data, batch_size), model=model, optimizer=optimizer, cost=nll_upper_bound, outputs=[nll_upper_bound], n_steps=n_steps, extension=extension, lr_iterations=lr_iterations, k_speedOfconvergence=30) mainloop.run() data = Iterator(test_data, batch_size) test_fn = theano.function( inputs=[x, y], #[x, y], #givens={x:Xtest}, #on_unused_input='ignore', #z=( ,200,1) allow_input_downcast=True, outputs=[ prediction_val, recon_term_val, totaMSE_val, totaMAE_val, mse1_val, mse2_val, mse3_val, mse4_val, mse5_val, mae1_val, mae2_val, mae3_val, mae4_val, mae5_val, relErr1_val, relErr2_val, relErr3_val, relErr4_val, relErr5_val, propAssigned1_val, propAssigned2_val, propAssigned3_val, propAssigned4_val, propAssigned5_val ] #prediction_val, mse_val, mae_val , updates= updates_val #, allow_input_downcast=True, on_unused_input='ignore' ) testOutput = [] testMetrics2 = [] numBatchTest = 0 for batch in data: outputGeneration = test_fn(batch[0], batch[2]) testOutput.append(outputGeneration[1:14]) testMetrics2.append(outputGeneration[14:]) #{0:[4,20], 2:[5,10]} #if (numBatchTest==0): plt.figure(1) plt.plot(np.transpose(outputGeneration[0], [1, 0, 2])[4]) #ORIGINAL 1,0,2 plt.savefig(save_path + "/vrnn_dis_generated{}_Pred_0-4".format(numBatchTest)) plt.clf() plt.figure(2) plt.plot(np.transpose(batch[2], [1, 0, 2])[4]) plt.savefig(save_path + "/vrnn_dis_generated{}_RealDisag_0-4".format(numBatchTest)) plt.clf() plt.figure(3) plt.plot(np.transpose(batch[0], [1, 0, 2])[4]) #ORIGINAL 1,0,2 plt.savefig(save_path + "/vrnn_dis_generated{}_Realagg_0-4".format(numBatchTest)) plt.clf() numBatchTest += 1 testOutput = np.asarray(testOutput) testMetrics2 = np.asarray(testMetrics2) print(testOutput.shape) print(testMetrics2.shape) recon_test = testOutput[:, 0].mean() mse_test = testOutput[:, 1].mean() mae_test = testOutput[:, 2].mean() mse1_test = testOutput[:, 3].mean() mae1_test = testOutput[:, 8].mean() mse2_test = testOutput[:, 4].mean() mae2_test = testOutput[:, 9].mean() mse3_test = testOutput[:, 5].mean() mae3_test = testOutput[:, 10].mean() mse4_test = testOutput[:, 6].mean() mae4_test = testOutput[:, 11].mean() mse5_test = testOutput[:, 7].mean() mae5_test = testOutput[:, 12].mean() relErr1_test = testMetrics2[:, 0].mean() relErr2_test = testMetrics2[:, 1].mean() relErr3_test = testMetrics2[:, 2].mean() relErr4_test = testMetrics2[:, 3].mean() relErr5_test = testMetrics2[:, 4].mean() propAssigned1_test = testMetrics2[:, 5].mean() propAssigned2_test = testMetrics2[:, 6].mean() propAssigned3_test = testMetrics2[:, 7].mean() propAssigned4_test = testMetrics2[:, 8].mean() propAssigned5_test = testMetrics2[:, 9].mean() fLog = open(save_path + '/output.csv', 'w') fLog.write(str(lr_iterations) + "\n") fLog.write(str(appliances) + "\n") fLog.write(str(windows) + "\n") fLog.write( "logTest,mse1_test,mse2_test,mse3_test,mse4_test,mse5_test,mae1_test,mae2_test,mae3_test,mae4_test,mae5_test,mseTest,maeTest\n" ) fLog.write("{},{},{},{},{},{},{},{},{},{},{},{},{}\n\n".format( recon_test, mse1_test, mse2_test, mse3_test, mse4_test, mse5_test, mae1_test, mae2_test, mae3_test, mae4_test, mae5_test, mse_test, mae_test)) fLog.write( "relErr1,relErr2,relErr3,relErr4,relErr5,propAssigned1,propAssigned2,propAssigned3,propAssigned4,propAssigned5\n" ) fLog.write("{},{},{},{},{},{},{},{},{},{}\n".format( relErr1_test, relErr2_test, relErr3_test, relErr4_test, relErr5_test, propAssigned1_test, propAssigned2_test, propAssigned3_test, propAssigned4_test, propAssigned5_test)) fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,y2s_dim,z2s_dim\n") fLog.write("{},{},{},{},{},{}\n".format(q_z_dim, p_z_dim, p_x_dim, x2s_dim, y2s_dim, z2s_dim)) fLog.write( "epoch,log,kl,mse1,mse2,mse3,mse4,mse5,mae1,mae2,mae3,mae4,mae5\n") for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']): d, e, f, g, j, k, l, m = 0, 0, 0, 0, 0, 0, 0, 0 ep = mainloop.trainlog.monitor['epoch'][i] a = mainloop.trainlog.monitor['recon_term'][i] b = mainloop.trainlog.monitor['kl_term'][i] c = mainloop.trainlog.monitor['mse1'][i] h = mainloop.trainlog.monitor['mae1'][i] d = mainloop.trainlog.monitor['mse2'][i] j = mainloop.trainlog.monitor['mae2'][i] e = mainloop.trainlog.monitor['mse3'][i] k = mainloop.trainlog.monitor['mae3'][i] f = mainloop.trainlog.monitor['mse4'][i] l = mainloop.trainlog.monitor['mae4'][i] g = mainloop.trainlog.monitor['mse5'][i] m = mainloop.trainlog.monitor['mae5'][i] fLog.write( "{:d},{:.2f},{:.2f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f}\n" .format(ep, a, b, c, d, e, f, g, h, j, k, l, m)) f = open(save_path + '/outputRealGeneration.pkl', 'wb') pickle.dump(outputGeneration, f, -1) f.close()
def main(args): theano.optimizer = 'fast_compile' #theano.config.exception_verbosity='high' trial = int(args['trial']) pkl_name = 'vrnn_gmm_%d' % trial channel_name = 'valid_nll_upper_bound' data_path = args['data_path'] save_path = args[ 'save_path'] #+'/aggVSdisag_distrib/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M") period = int(args['period']) n_steps = int(args['n_steps']) stride_train = int(args['stride_train']) stride_test = int(args['stride_test']) monitoring_freq = int(args['monitoring_freq']) epoch = int(args['epoch']) batch_size = int(args['batch_size']) x_dim = int(args['x_dim']) y_dim = int(args['y_dim']) z_dim = int(args['z_dim']) rnn_dim = int(args['rnn_dim']) k = int(args['num_k']) #a mixture of K Gaussian functions lr = float(args['lr']) debug = int(args['debug']) print "trial no. %d" % trial print "batch size %d" % batch_size print "learning rate %f" % lr print "saving pkl file '%s'" % pkl_name print "to the save path '%s'" % save_path q_z_dim = 12 #150 p_z_dim = 12 #150 p_x_dim = 20 #250 x2s_dim = 15 #250 z2s_dim = 20 #150 target_dim = k # As different appliances are separeted in theta_mu1, theta_mu2, etc... each one is just created from k different Gaussians model = Model() train_data = UKdale( name='train', prep='normalize', cond=True, # False path=data_path, period=period, n_steps=n_steps, x_dim=x_dim, stride_train=stride_train, stride_test=stride_test) X_mean = train_data.X_mean X_std = train_data.X_std valid_data = UKdale( name='valid', prep='normalize', cond=True, # False path=data_path, X_mean=X_mean, X_std=X_std, period=period, n_steps=n_steps, x_dim=x_dim, stride_train=stride_train, stride_test=stride_test) init_W = InitCell('rand') init_U = InitCell('ortho') init_b = InitCell('zeros') init_b_sig = InitCell('const', mean=0.6) x, mask, y, y_mask = train_data.theano_vars() x.name = 'x_original' if debug: x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32) temp = np.ones((15, batch_size), dtype=np.float32) temp[:, -2:] = 0. mask.tag.test_value = temp x_1 = FullyConnectedLayer(name='x_1', parent=['x_t'], parent_dim=[x_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) z_1 = FullyConnectedLayer(name='z_1', parent=['z_t'], parent_dim=[z_dim], nout=z2s_dim, unit='relu', init_W=init_W, init_b=init_b) rnn = LSTM(name='rnn', parent=['x_1', 'z_1'], parent_dim=[x2s_dim, z2s_dim], nout=rnn_dim, unit='tanh', init_W=init_W, init_U=init_U, init_b=init_b) ''' dissag_pred = FullyConnectedLayer(name='disag_1', parent=['s_tm1'], parent_dim=[rnn_dim], nout=num_apps, unit='relu', init_W=init_W, init_b=init_b) ''' phi_1 = FullyConnectedLayer(name='phi_1', parent=['x_1', 's_tm1'], parent_dim=[x2s_dim, rnn_dim], nout=q_z_dim, unit='relu', init_W=init_W, init_b=init_b) phi_mu = FullyConnectedLayer(name='phi_mu', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) phi_sig = FullyConnectedLayer(name='phi_sig', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) prior_1 = FullyConnectedLayer(name='prior_1', parent=['s_tm1'], parent_dim=[rnn_dim], nout=p_z_dim, unit='relu', init_W=init_W, init_b=init_b) prior_mu = FullyConnectedLayer(name='prior_mu', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) prior_sig = FullyConnectedLayer(name='prior_sig', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_1 = FullyConnectedLayer(name='theta_1', parent=['z_1', 's_tm1'], parent_dim=[z2s_dim, rnn_dim], nout=p_x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_mu = [] theta_sig = [] coeff = [] for i in range(y_dim): theta_mu.append( FullyConnectedLayer(name='theta_mu' + str(i), parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b)) theta_sig.append( FullyConnectedLayer(name='theta_sig' + str(i), parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig)) coeff.append( FullyConnectedLayer(name='coeff' + str(i), parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b)) ''' theta_mu1 = FullyConnectedLayer(name='theta_mu1', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_mu2 = FullyConnectedLayer(name='theta_mu2', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_mu3 = FullyConnectedLayer(name='theta_mu3', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_sig1 = FullyConnectedLayer(name='theta_sig1', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_sig2 = FullyConnectedLayer(name='theta_sig2', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_sig3 = FullyConnectedLayer(name='theta_sig3', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) coeff1 = FullyConnectedLayer(name='coeff1', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) coeff2 = FullyConnectedLayer(name='coeff2', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) coeff3 = FullyConnectedLayer(name='coeff3', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) ''' corr = FullyConnectedLayer(name='corr', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='tanh', init_W=init_W, init_b=init_b) binary = FullyConnectedLayer(name='binary', parent=['theta_1'], parent_dim=[p_x_dim], nout=1, unit='sigmoid', init_W=init_W, init_b=init_b) nodes = [ rnn, x_1, z_1, #dissag_pred, phi_1, phi_mu, phi_sig, prior_1, prior_mu, prior_sig, theta_1 ] #theta_mu1, theta_mu2, theta_mu3, theta_sig1, theta_sig2, theta_sig3, coeff1, coeff2 ,coeff3] for i in range(y_dim): nodes.append(theta_mu[i]) #, corr, binary nodes.append(theta_sig[i]) nodes.append(coeff[i]) params = OrderedDict() for node in nodes: if node.initialize() is not None: params.update(node.initialize()) params = init_tparams(params) s_0 = rnn.get_init_state(batch_size) x_1_temp = x_1.fprop([x], params) def inner_fn(x_t, s_tm1): phi_1_t = phi_1.fprop([x_t, s_tm1], params) phi_mu_t = phi_mu.fprop([phi_1_t], params) phi_sig_t = phi_sig.fprop([phi_1_t], params) prior_1_t = prior_1.fprop([s_tm1], params) prior_mu_t = prior_mu.fprop([prior_1_t], params) prior_sig_t = prior_sig.fprop([prior_1_t], params) z_t = Gaussian_sample( phi_mu_t, phi_sig_t ) #in the original code it is gaussian. GMM is for the generation z_1_t = z_1.fprop([z_t], params) theta_1_t = theta_1.fprop([z_1_t, s_tm1], params) #theta_mu_t= TL.TypedListType(T.dtensor3)() theta_mu_t = TL.TypedListType(T.TensorType('float64', (False, ) * 2))() #heta_mu_t = []#T.ftensor3('theta_mu_t') #theta_mu_t = [theta_mu_y.fprop([theta_1_t], params) for theta_mu_y in theta_mu] for theta_mu_y in theta_mu: theta_mu_t.append(theta_mu_y.fprop([theta_1_t], params)) theta_sig_t = TL.TypedListType(T.TensorType('float64', (False, ) * 2))() for theta_sig_y in theta_sig: theta_sig_t.append(theta_sig_y.fprop([theta_1_t], params)) coeff_t = TL.TypedListType(T.TensorType('float64', (False, ) * 2))() for theta_coef_y in coeff: coeff_t.append(theta_coef_y.fprop([theta_1_t], params)) ''' theta_sig_t = [theta_sig_y.fprop([theta_1_t], params) for theta_sig_y in theta_sig] coeff_t = [theta_coef_y.fprop([theta_1_t], params) for theta_coef_y in coeff] ''' ''' theta_mu1_t = theta_mu1.fprop([theta_1_t], params) theta_sig1_t = theta_sig1.fprop([theta_1_t], params) coeff1_t = coeff1.fprop([theta_1_t], params) theta_mu2_t = theta_mu2.fprop([theta_1_t], params) theta_sig2_t = theta_sig2.fprop([theta_1_t], params) coeff2_t = coeff2.fprop([theta_1_t], params) theta_mu3_t = theta_mu3.fprop([theta_1_t], params) theta_sig3_t = theta_sig3.fprop([theta_1_t], params) coeff3_t = coeff3.fprop([theta_1_t], params) ''' #corr_t = corr.fprop([theta_1_t], params) #binary_t = binary.fprop([theta_1_t], params) # I was missing this reshape that is done before BiGMM in the original code ''' theta_mu_in = theta_mu_t.reshape((x_t[0]*x_t[1], -1)) theta_sig_in = theta_sig_t.reshape((x_t[0]*x_t[1], -1)) coeff_in = coeff_t.reshape((x_t[0]*x_t[1], -1)) y_pred1 = GMM_sampleY(theta_mu1_t, theta_sig1_t, coeff1_t) #Gaussian_sample(theta_mu_t, theta_sig_t) y_pred2 = GMM_sampleY(theta_mu2_t, theta_sig2_t, coeff2_t) y_pred3 = GMM_sampleY(theta_mu3_t, theta_sig3_t, coeff3_t) ''' #y_pred = [GMM_sampleY(theta_mu_t[i], theta_sig_t[i], coeff_t[i]) for i in range(y_dim)]#T.stack([y_pred1,y_pred2],axis = 0 ) s_t = rnn.fprop([[x_t, z_1_t], [s_tm1]], params) #y_pred = dissag_pred.fprop([s_t], params) #return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_t, z_1_t, theta_1_t, theta_mu_t[0], theta_sig_t[0], coeff_t[0], theta_mu_t[1], theta_sig_t[1], coeff_t[1], theta_mu_t[2], theta_sig_t[2], coeff_t[2],y_pred1, y_pred2, y_pred3 return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_t, z_1_t, theta_mu_t, theta_sig_t, coeff_t #,y_pred #corr_temp, binary_temp # ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,z_t_temp, z_1_temp, theta_1_temp, theta_mu_temp, theta_sig_temp, coeff_temp, y_pred_temp), updates) =\ [s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,z_t_temp, z_1_temp, theta_mu_temp, theta_sig_temp, coeff_temp], updates =\ theano.scan(fn=inner_fn, sequences=[x_1_temp], outputs_info=[s_0, None, None, None, None, None, None, None, None,None]) for k, v in updates.iteritems(): k.default_update = v s_temp = concatenate( [s_0[None, :, :], s_temp[:-1]], axis=0 ) # seems like this is for creating an additional dimension to s_0 ''' theta_1_temp = theta_1.fprop([z_1_temp, s_temp], params) theta_mu_temp = theta_mu.fprop([theta_1_temp], params) theta_sig_temp = theta_sig.fprop([theta_1_temp], params) coeff_temp = coeff.fprop([theta_1_temp], params) corr_temp = corr.fprop([theta_1_temp], params) binary_temp = binary.fprop([theta_1_temp], params) ''' s_temp.name = 'h_1' #gisse z_1_temp.name = 'z_1' #gisse z_t_temp.name = 'z' theta_mu_temp.name = 'theta_mu' theta_sig_temp.name = 'theta_sig' coeff_temp.name = 'coeff' #y_pred_temp.name = 'disaggregation' ''' theta_mu1_temp.name = 'theta_mu1' theta_sig1_temp.name = 'theta_sig1' coeff1_temp.name = 'coeff1' theta_mu2_temp.name = 'theta_mu2' theta_sig2_temp.name = 'theta_sig2' coeff2_temp.name = 'coeff2' #corr_temp.name = 'corr' #binary_temp.name = 'binary' #x_pred_temp.name = 'x_reconstructed' y_pred1_temp.name = 'disaggregation1' y_pred2_temp.name = 'disaggregation2' y_pred3_temp.name = 'disaggregation3' ''' #mse = T.mean((y_pred_temp - y)**2) # cause mse can be 26000 #mse.name = 'mse' kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp) x_shape = x.shape y_shape = y.shape x_in = x.reshape((x_shape[0] * x_shape[1], -1)) y_in = y.reshape((y_shape[0] * y_shape[1], -1)) theta_mu_in = [ theta_mu_temp[i].reshape((y_shape[0] * y_shape[1], -1)) for i in range(y_dim) ] theta_sig_in = [ theta_sig_temp[i].reshape((y_shape[0] * y_shape[1], -1)) for i in range(y_dim) ] coeff_in = [ coeff_temp[i].reshape((y_shape[0] * y_shape[1], -1)) for i in range(y_dim) ] ''' theta_mu1_in = theta_mu1_temp.reshape((y_shape[0]*y_shape[1], -1)) theta_sig1_in = theta_sig1_temp.reshape((y_shape[0]*y_shape[1], -1)) coeff1_in = coeff1_temp.reshape((y_shape[0]*y_shape[1], -1)) theta_mu2_in = theta_mu2_temp.reshape((y_shape[0]*y_shape[1], -1)) theta_sig2_in = theta_sig2_temp.reshape((y_shape[0]*y_shape[1], -1)) coeff2_in = coeff2_temp.reshape((y_shape[0]*y_shape[1], -1)) theta_mu3_in = theta_mu3_temp.reshape((y_shape[0]*y_shape[1], -1)) theta_sig3_in = theta_sig3_temp.reshape((y_shape[0]*y_shape[1], -1)) coeff3_in = coeff3_temp.reshape((y_shape[0]*y_shape[1], -1)) ''' #corr_in = corr_temp.reshape((x_shape[0]*x_shape[1], -1)) #binary_in = binary_temp.reshape((x_shape[0]*x_shape[1], -1)) recon = GMMdisagMulti( y_in, y_dim, theta_mu_in, theta_sig_in, coeff_in ) # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in) recon = recon.reshape((x_shape[0], x_shape[1])) recon.name = 'gmm_out' #recon = recon * mask recon_term = recon.sum(axis=0).mean() recon_term.name = 'recon_term' #kl_temp = kl_temp * mask kl_term = kl_temp.sum(axis=0).mean() kl_term.name = 'kl_term' #nll_upper_bound_0 = recon_term + kl_term #nll_upper_bound_0.name = 'nll_upper_bound_0' nll_upper_bound = recon_term + kl_term #+ mse nll_upper_bound.name = 'nll_upper_bound' ''' max_x = x.max() mean_x = x.mean() min_x = x.min() max_x.name = 'max_x' mean_x.name = 'mean_x' min_x.name = 'min_x' max_theta_mu = theta_mu_in.max() mean_theta_mu = theta_mu_in.mean() min_theta_mu = theta_mu_in.min() max_theta_mu.name = 'max_theta_mu' mean_theta_mu.name = 'mean_theta_mu' min_theta_mu.name = 'min_theta_mu' max_theta_sig = theta_sig_in.max() mean_theta_sig = theta_sig_in.mean() min_theta_sig = theta_sig_in.min() max_theta_sig.name = 'max_theta_sig' mean_theta_sig.name = 'mean_theta_sig' min_theta_sig.name = 'min_theta_sig' coeff_max = coeff_in.max() coeff_min = coeff_in.min() coeff_mean_max = coeff_in.mean(axis=0).max() coeff_mean_min = coeff_in.mean(axis=0).min() coeff_max.name = 'coeff_max' coeff_min.name = 'coeff_min' coeff_mean_max.name = 'coeff_mean_max' coeff_mean_min.name = 'coeff_mean_min' max_phi_sig = phi_sig_temp.max() mean_phi_sig = phi_sig_temp.mean() min_phi_sig = phi_sig_temp.min() max_phi_sig.name = 'max_phi_sig' mean_phi_sig.name = 'mean_phi_sig' min_phi_sig.name = 'min_phi_sig' max_prior_sig = prior_sig_temp.max() mean_prior_sig = prior_sig_temp.mean() min_prior_sig = prior_sig_temp.min() max_prior_sig.name = 'max_prior_sig' mean_prior_sig.name = 'mean_prior_sig' min_prior_sig.name = 'min_prior_sig' ''' model.inputs = [x, mask, y, y_mask] model.params = params model.nodes = nodes optimizer = Adam(lr=lr) extension = [ GradientClipping(batch_size=batch_size), EpochCount(epoch), Monitoring( freq=monitoring_freq, ddout=[ nll_upper_bound, recon_term, kl_term, #2 theta_mu_temp, theta_sig_temp, z_t_temp, #y_pred_temp, coeff_temp, s_temp, z_1_temp ], indexSep=1, # 0 for previous function, 1 for ploting personalized indexDDoutPlot=[(3, theta_mu_temp), (5, z_t_temp) ], # adding indexes of ddout for the plotting #, (6,y_pred_temp) instancesPlot=[20, 100], #0-150 data=[Iterator(valid_data, batch_size)], savedFolder=save_path), Picklize(freq=monitoring_freq, path=save_path), EarlyStopping(freq=monitoring_freq, path=save_path, channel=channel_name), WeightNorm() ] mainloop = Training(name=pkl_name, data=Iterator(train_data, batch_size), model=model, optimizer=optimizer, cost=nll_upper_bound, outputs=[nll_upper_bound], extension=extension) mainloop.run() fLog = open(save_path + '/output.csv', 'w') print('Printing') print(len(mainloop.trainlog.monitor['nll_upper_bound'])) fLog.write("log,kl,nll_upper_bound\n") for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']): a = mainloop.trainlog.monitor['recon_term'][i] b = mainloop.trainlog.monitor['kl_term'][i] #c = mainloop.trainlog.monitor['mse'][i] d = mainloop.trainlog.monitor['nll_upper_bound'] #print(a,b) fLog.write("{},{},{}\n".format(a, b, d)) fLog.close()
def main(args): theano.optimizer = 'fast_compile' #theano.config.exception_verbosity='high' trial = int(args['trial']) pkl_name = 'vrnn_gmm_%d' % trial channel_name = 'valid_nll_upper_bound' data_path = args['data_path'] save_path = args[ 'save_path'] #+'/aggVSdisag_distribEach/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M") period = int(args['period']) n_steps = int(args['n_steps']) stride_train = int(args['stride_train']) stride_test = int(args['stride_test']) # monitoring is a value that represents how many batches from training have to be seen to measure the validation set monitoring_freq = int(args['monitoring_freq']) epoch = int(args['epoch']) batch_size = int(args['batch_size']) x_dim = int(args['x_dim']) y_dim = int(args['y_dim']) z_dim = int(args['z_dim']) rnn_dim = int(args['rnn_dim']) k = int(args['num_k']) #a mixture of K Gaussian functions lr = float(args['lr']) debug = int(args['debug']) print "trial no. %d" % trial print "batch size %d" % batch_size print "learning rate %f" % lr print "saving pkl file '%s'" % pkl_name print "to the save path '%s'" % save_path q_z_dim = 8 #150 p_z_dim = 8 #150 p_x_dim = 7 #250 x2s_dim = 7 #250 z2s_dim = 8 #150 target_dim = y_dim * k #(x_dim-1)*k model = Model() train_data = UKdale( name='train', prep='normalize', cond=True, # False path=data_path, period=period, n_steps=n_steps, x_dim=x_dim, stride_train=stride_train, stride_test=stride_test) X_mean = train_data.X_mean X_std = train_data.X_std valid_data = UKdale( name='valid', prep='normalize', cond=True, # False path=data_path, X_mean=X_mean, X_std=X_std, period=period, n_steps=n_steps, x_dim=x_dim, stride_train=stride_train, stride_test=stride_test) init_W = InitCell('rand') init_U = InitCell('ortho') init_b = InitCell('zeros') init_b_sig = InitCell('const', mean=0.6) x, mask, y, y_mask = train_data.theano_vars() x.name = 'x_original' if debug: x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32) temp = np.ones((15, batch_size), dtype=np.float32) temp[:, -2:] = 0. mask.tag.test_value = temp x_1 = FullyConnectedLayer(name='x_1', parent=['x_t'], parent_dim=[x_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) z_1 = FullyConnectedLayer(name='z_1', parent=['z_t'], parent_dim=[z_dim], nout=z2s_dim, unit='relu', init_W=init_W, init_b=init_b) rnn = LSTM(name='rnn', parent=['x_1', 'z_1'], parent_dim=[x2s_dim, z2s_dim], nout=rnn_dim, unit='tanh', init_W=init_W, init_U=init_U, init_b=init_b) phi_1 = FullyConnectedLayer(name='phi_1', parent=['x_1', 's_tm1'], parent_dim=[x2s_dim, rnn_dim], nout=q_z_dim, unit='relu', init_W=init_W, init_b=init_b) phi_mu = FullyConnectedLayer(name='phi_mu', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) phi_sig = FullyConnectedLayer(name='phi_sig', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) prior_1 = FullyConnectedLayer(name='prior_1', parent=['s_tm1'], parent_dim=[rnn_dim], nout=p_z_dim, unit='relu', init_W=init_W, init_b=init_b) prior_mu = FullyConnectedLayer(name='prior_mu', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) prior_sig = FullyConnectedLayer(name='prior_sig', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_1 = FullyConnectedLayer(name='theta_1', parent=['z_1', 's_tm1'], parent_dim=[z2s_dim, rnn_dim], nout=p_x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_mu = FullyConnectedLayer(name='theta_mu', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_sig = FullyConnectedLayer(name='theta_sig', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) coeff = FullyConnectedLayer( name='coeff', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', #to ensure that the sum adds to one init_W=init_W, init_b=init_b) corr = FullyConnectedLayer(name='corr', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='tanh', init_W=init_W, init_b=init_b) binary = FullyConnectedLayer(name='binary', parent=['theta_1'], parent_dim=[p_x_dim], nout=1, unit='sigmoid', init_W=init_W, init_b=init_b) nodes = [ rnn, x_1, z_1, #dissag_pred, phi_1, phi_mu, phi_sig, prior_1, prior_mu, prior_sig, theta_1, theta_mu, theta_sig, coeff ] #, corr, binary params = OrderedDict() for node in nodes: if node.initialize() is not None: params.update(node.initialize()) params = init_tparams(params) s_0 = rnn.get_init_state(batch_size) x_1_temp = x_1.fprop([x], params) def inner_fn(x_t, s_tm1): phi_1_t = phi_1.fprop([x_t, s_tm1], params) phi_mu_t = phi_mu.fprop([phi_1_t], params) phi_sig_t = phi_sig.fprop([phi_1_t], params) prior_1_t = prior_1.fprop([s_tm1], params) prior_mu_t = prior_mu.fprop([prior_1_t], params) prior_sig_t = prior_sig.fprop([prior_1_t], params) z_t = Gaussian_sample( phi_mu_t, phi_sig_t ) #in the original code it is gaussian. GMM is for the generation z_1_t = z_1.fprop([z_t], params) theta_1_t = theta_1.fprop([z_1_t, s_tm1], params) theta_mu_t = theta_mu.fprop([theta_1_t], params) theta_sig_t = theta_sig.fprop([theta_1_t], params) coeff_t = coeff.fprop([theta_1_t], params) #corr_t = corr.fprop([theta_1_t], params) #binary_t = binary.fprop([theta_1_t], params) # I was missing this reshape that is done before BiGMM in the original code ''' theta_mu_in = theta_mu_t.reshape((x_t[0]*x_t[1], -1)) theta_sig_in = theta_sig_t.reshape((x_t[0]*x_t[1], -1)) coeff_in = coeff_t.reshape((x_t[0]*x_t[1], -1)) ''' y_pred = GMM_sampleY( theta_mu_t, theta_sig_t, coeff_t) #Gaussian_sample(theta_mu_t, theta_sig_t) s_t = rnn.fprop([[x_t, z_1_t], [s_tm1]], params) #y_pred = dissag_pred.fprop([s_t], params) return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_t, z_1_t, theta_1_t, theta_mu_t, theta_sig_t, coeff_t, y_pred #corr_temp, binary_temp ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,z_t_temp, z_1_temp, theta_1_temp, theta_mu_temp, theta_sig_temp, coeff_temp, y_pred_temp), updates) =\ theano.scan(fn=inner_fn, sequences=[x_1_temp], outputs_info=[s_0, None, None, None, None, None, None, None, None, None, None, None]) for k, v in updates.iteritems(): k.default_update = v s_temp = concatenate( [s_0[None, :, :], s_temp[:-1]], axis=0 ) # seems like this is for creating an additional dimension to s_0 ''' theta_1_temp = theta_1.fprop([z_1_temp, s_temp], params) theta_mu_temp = theta_mu.fprop([theta_1_temp], params) theta_sig_temp = theta_sig.fprop([theta_1_temp], params) coeff_temp = coeff.fprop([theta_1_temp], params) corr_temp = corr.fprop([theta_1_temp], params) binary_temp = binary.fprop([theta_1_temp], params) ''' s_temp.name = 'h_1' #gisse z_1_temp.name = 'z_1' #gisse z_t_temp.name = 'z' theta_mu_temp.name = 'theta_mu' theta_sig_temp.name = 'theta_sig' coeff_temp.name = 'coeff' #corr_temp.name = 'corr' #binary_temp.name = 'binary' #x_pred_temp.name = 'x_reconstructed' y_pred_temp.name = 'disaggregation' #mse = T.mean((y_pred_temp - y)**2) # cause mse can be 26000 #mse.name = 'mse' kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp) x_shape = x.shape y_shape = y.shape x_in = x.reshape((x_shape[0] * x_shape[1], -1)) y_in = y.reshape((y_shape[0] * y_shape[1], -1)) theta_mu_in = theta_mu_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig_in = theta_sig_temp.reshape((x_shape[0] * x_shape[1], -1)) coeff_in = coeff_temp.reshape((x_shape[0] * x_shape[1], -1)) #corr_in = corr_temp.reshape((x_shape[0]*x_shape[1], -1)) #binary_in = binary_temp.reshape((x_shape[0]*x_shape[1], -1)) #print("Printing shapes") #print (y_in.shape, theta_mu_in.shape, theta_sig_in.shape, coeff_in.shape) recon = GMM( y_in, theta_mu_in, theta_sig_in, coeff_in ) # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in) recon = recon.reshape((x_shape[0], x_shape[1])) recon.name = 'gmm_out' #recon = recon * mask recon_term = recon.sum(axis=0).mean() recon_term.name = 'recon_term' #kl_temp = kl_temp * mask kl_term = kl_temp.sum(axis=0).mean() kl_term.name = 'kl_term' #nll_upper_bound_0 = recon_term + kl_term #nll_upper_bound_0.name = 'nll_upper_bound_0' nll_upper_bound = recon_term + kl_term #+ mse nll_upper_bound.name = 'nll_upper_bound' max_x = x.max() mean_x = x.mean() min_x = x.min() max_x.name = 'max_x' mean_x.name = 'mean_x' min_x.name = 'min_x' max_theta_mu = theta_mu_in.max() mean_theta_mu = theta_mu_in.mean() min_theta_mu = theta_mu_in.min() max_theta_mu.name = 'max_theta_mu' mean_theta_mu.name = 'mean_theta_mu' min_theta_mu.name = 'min_theta_mu' max_theta_sig = theta_sig_in.max() mean_theta_sig = theta_sig_in.mean() min_theta_sig = theta_sig_in.min() max_theta_sig.name = 'max_theta_sig' mean_theta_sig.name = 'mean_theta_sig' min_theta_sig.name = 'min_theta_sig' coeff_max = coeff_in.max() coeff_min = coeff_in.min() coeff_mean_max = coeff_in.mean(axis=0).max() coeff_mean_min = coeff_in.mean(axis=0).min() coeff_max.name = 'coeff_max' coeff_min.name = 'coeff_min' coeff_mean_max.name = 'coeff_mean_max' coeff_mean_min.name = 'coeff_mean_min' max_phi_sig = phi_sig_temp.max() mean_phi_sig = phi_sig_temp.mean() min_phi_sig = phi_sig_temp.min() max_phi_sig.name = 'max_phi_sig' mean_phi_sig.name = 'mean_phi_sig' min_phi_sig.name = 'min_phi_sig' max_prior_sig = prior_sig_temp.max() mean_prior_sig = prior_sig_temp.mean() min_prior_sig = prior_sig_temp.min() max_prior_sig.name = 'max_prior_sig' mean_prior_sig.name = 'mean_prior_sig' min_prior_sig.name = 'min_prior_sig' model.inputs = [x, mask, y, y_mask] model.params = params model.nodes = nodes optimizer = Adam(lr=lr) extension = [ GradientClipping(batch_size=batch_size), EpochCount(epoch), Monitoring( freq=monitoring_freq, ddout=[ nll_upper_bound, recon_term, kl_term, #mse, max_phi_sig, mean_phi_sig, min_phi_sig, max_prior_sig, mean_prior_sig, min_prior_sig, max_theta_sig, mean_theta_sig, min_theta_sig, max_x, mean_x, min_x, max_theta_mu, mean_theta_mu, min_theta_mu, coeff_max, coeff_min, coeff_mean_max, coeff_mean_min, #23 theta_mu_temp, theta_sig_temp, z_t_temp, y_pred_temp, #corr_temp, binary_temp, coeff_temp, #22 s_temp, z_1_temp ], indexSep=22, indexDDoutPlot=[(0, theta_mu_temp), (2, z_t_temp), (3, y_pred_temp) ], # adding indexes of ddout for the plotting instancesPlot=[0, 150], data=[Iterator(valid_data, batch_size)], savedFolder=save_path), Picklize(freq=monitoring_freq, path=save_path), EarlyStopping(freq=monitoring_freq, path=save_path, channel=channel_name), WeightNorm() ] mainloop = Training(name=pkl_name, data=Iterator(train_data, batch_size), model=model, optimizer=optimizer, cost=nll_upper_bound, outputs=[nll_upper_bound], extension=extension) mainloop.run() fLog = open(save_path + '/output.csv', 'w') print('Printing') print(len(mainloop.trainlog.monitor['nll_upper_bound'])) fLog.write("log,kl,mse,nll_upper_bound\n") for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']): a = mainloop.trainlog.monitor['recon_term'][i] b = mainloop.trainlog.monitor['kl_term'][i] #c = mainloop.trainlog.monitor['mse'][i] d = mainloop.trainlog.monitor['nll_upper_bound'] #print(a,b) fLog.write("{},{},{}\n".format(a, b, d)) fLog.close()
def main(args): trial = int(args['trial']) pkl_name = 'vrnn_gauss_%d' % trial channel_name = 'valid_nll_upper_bound' data_path = args['data_path'] save_path = args['save_path'] monitoring_freq = int(args['monitoring_freq']) force_saving_freq = int(args['force_saving_freq']) reset_freq = int(args['reset_freq']) epoch = int(args['epoch']) batch_size = int(args['batch_size']) m_batch_size = int(args['m_batch_size']) x_dim = int(args['x_dim']) z_dim = int(args['z_dim']) rnn_dim = int(args['rnn_dim']) lr = float(args['lr']) debug = int(args['debug']) print "trial no. %d" % trial print "batch size %d" % batch_size print "learning rate %f" % lr print "saving pkl file '%s'" % pkl_name print "to the save path '%s'" % save_path q_z_dim = 500 p_z_dim = 500 p_x_dim = 600 x2s_dim = 600 z2s_dim = 500 target_dim = x_dim file_name = 'blizzard_unseg_tbptt' normal_params = np.load(data_path + file_name + '_normal.npz') X_mean = normal_params['X_mean'] X_std = normal_params['X_std'] model = Model() train_data = Blizzard_tbptt(name='train', path=data_path, frame_size=x_dim, file_name=file_name, X_mean=X_mean, X_std=X_std) valid_data = Blizzard_tbptt(name='valid', path=data_path, frame_size=x_dim, file_name=file_name, X_mean=X_mean, X_std=X_std) x = train_data.theano_vars() m_x = valid_data.theano_vars() if debug: x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=theano.config.floatX) m_x.tag.test_value = np.zeros((15, m_batch_size, x_dim), dtype=theano.config.floatX) init_W = InitCell('rand') init_U = InitCell('ortho') init_b = InitCell('zeros') init_b_sig = InitCell('const', mean=0.6) x_1 = FullyConnectedLayer(name='x_1', parent=['x_t'], parent_dim=[x_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) x_2 = FullyConnectedLayer(name='x_2', parent=['x_1'], parent_dim=[x2s_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) x_3 = FullyConnectedLayer(name='x_3', parent=['x_2'], parent_dim=[x2s_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) x_4 = FullyConnectedLayer(name='x_4', parent=['x_3'], parent_dim=[x2s_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) z_1 = FullyConnectedLayer(name='z_1', parent=['z_t'], parent_dim=[z_dim], nout=z2s_dim, unit='relu', init_W=init_W, init_b=init_b) z_2 = FullyConnectedLayer(name='z_2', parent=['z_1'], parent_dim=[z2s_dim], nout=z2s_dim, unit='relu', init_W=init_W, init_b=init_b) z_3 = FullyConnectedLayer(name='z_3', parent=['z_2'], parent_dim=[z2s_dim], nout=z2s_dim, unit='relu', init_W=init_W, init_b=init_b) z_4 = FullyConnectedLayer(name='z_4', parent=['z_3'], parent_dim=[z2s_dim], nout=z2s_dim, unit='relu', init_W=init_W, init_b=init_b) rnn = LSTM(name='rnn', parent=['x_4', 'z_4'], parent_dim=[x2s_dim, z2s_dim], nout=rnn_dim, unit='tanh', init_W=init_W, init_U=init_U, init_b=init_b) phi_1 = FullyConnectedLayer(name='phi_1', parent=['x_4', 's_tm1'], parent_dim=[x2s_dim, rnn_dim], nout=q_z_dim, unit='relu', init_W=init_W, init_b=init_b) phi_2 = FullyConnectedLayer(name='phi_2', parent=['phi_1'], parent_dim=[q_z_dim], nout=q_z_dim, unit='relu', init_W=init_W, init_b=init_b) phi_3 = FullyConnectedLayer(name='phi_3', parent=['phi_2'], parent_dim=[q_z_dim], nout=q_z_dim, unit='relu', init_W=init_W, init_b=init_b) phi_4 = FullyConnectedLayer(name='phi_4', parent=['phi_3'], parent_dim=[q_z_dim], nout=q_z_dim, unit='relu', init_W=init_W, init_b=init_b) phi_mu = FullyConnectedLayer(name='phi_mu', parent=['phi_4'], parent_dim=[q_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) phi_sig = FullyConnectedLayer(name='phi_sig', parent=['phi_4'], parent_dim=[q_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) prior_1 = FullyConnectedLayer(name='prior_1', parent=['s_tm1'], parent_dim=[rnn_dim], nout=p_z_dim, unit='relu', init_W=init_W, init_b=init_b) prior_2 = FullyConnectedLayer(name='prior_2', parent=['prior_1'], parent_dim=[p_z_dim], nout=p_z_dim, unit='relu', init_W=init_W, init_b=init_b) prior_3 = FullyConnectedLayer(name='prior_3', parent=['prior_2'], parent_dim=[p_z_dim], nout=p_z_dim, unit='relu', init_W=init_W, init_b=init_b) prior_4 = FullyConnectedLayer(name='prior_4', parent=['prior_3'], parent_dim=[p_z_dim], nout=p_z_dim, unit='relu', init_W=init_W, init_b=init_b) prior_mu = FullyConnectedLayer(name='prior_mu', parent=['prior_4'], parent_dim=[p_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) prior_sig = FullyConnectedLayer(name='prior_sig', parent=['prior_4'], parent_dim=[p_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_1 = FullyConnectedLayer(name='theta_1', parent=['z_4', 's_tm1'], parent_dim=[z2s_dim, rnn_dim], nout=p_x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_2 = FullyConnectedLayer(name='theta_2', parent=['theta_1'], parent_dim=[p_x_dim], nout=p_x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_3 = FullyConnectedLayer(name='theta_3', parent=['theta_2'], parent_dim=[p_x_dim], nout=p_x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_4 = FullyConnectedLayer(name='theta_4', parent=['theta_3'], parent_dim=[p_x_dim], nout=p_x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_mu = FullyConnectedLayer(name='theta_mu', parent=['theta_4'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_sig = FullyConnectedLayer(name='theta_sig', parent=['theta_4'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) nodes = [ rnn, x_1, x_2, x_3, x_4, z_1, z_2, z_3, z_4, phi_1, phi_2, phi_3, phi_4, phi_mu, phi_sig, prior_1, prior_2, prior_3, prior_4, prior_mu, prior_sig, theta_1, theta_2, theta_3, theta_4, theta_mu, theta_sig ] params = OrderedDict() for node in nodes: if node.initialize() is not None: params.update(node.initialize()) params = init_tparams(params) step_count = sharedX(0, name='step_count') last_rnn = np.zeros((batch_size, rnn_dim * 2), dtype=theano.config.floatX) rnn_tm1 = sharedX(last_rnn, name='rnn_tm1') shared_updates = OrderedDict() shared_updates[step_count] = step_count + 1 # Resets / Initializes the cell-state or the memory-state of each LSTM to # zero. s_0 = T.switch(T.eq(T.mod(step_count, reset_freq), 0), rnn.get_init_state(batch_size), rnn_tm1) # Forward Propagate the input to get more complex features for # every time step. x_1_temp = x_1.fprop([x], params) x_2_temp = x_2.fprop([x_1_temp], params) x_3_temp = x_3.fprop([x_2_temp], params) x_4_temp = x_4.fprop([x_3_temp], params) def inner_fn(x_t, s_tm1): # Generate the mean and standard deviation of the # latent variables Z_t | X_t for every time-step of the LSTM. # This is a function of the input and the hidden state of the previous # time step. phi_1_t = phi_1.fprop([x_t, s_tm1], params) phi_2_t = phi_2.fprop([phi_1_t], params) phi_3_t = phi_3.fprop([phi_2_t], params) phi_4_t = phi_4.fprop([phi_3_t], params) phi_mu_t = phi_mu.fprop([phi_4_t], params) phi_sig_t = phi_sig.fprop([phi_4_t], params) # Prior on the latent variables at every time-step # Dependent only on the hidden-step. prior_1_t = prior_1.fprop([s_tm1], params) prior_2_t = prior_2.fprop([prior_1_t], params) prior_3_t = prior_3.fprop([prior_2_t], params) prior_4_t = prior_4.fprop([prior_3_t], params) prior_mu_t = prior_mu.fprop([prior_4_t], params) prior_sig_t = prior_sig.fprop([prior_4_t], params) # Sample from the latent distibution with mean phi_mu_t # and std phi_sig_t z_t = Gaussian_sample(phi_mu_t, phi_sig_t) # h_t = f(h_(t-1)), z_t, x_t) z_1_t = z_1.fprop([z_t], params) z_2_t = z_2.fprop([z_1_t], params) z_3_t = z_3.fprop([z_2_t], params) z_4_t = z_4.fprop([z_3_t], params) s_t = rnn.fprop([[x_t, z_4_t], [s_tm1]], params) return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_4_t # Iterate over every time-step ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp, z_4_temp), updates) =\ theano.scan(fn=inner_fn, sequences=[x_4_temp], outputs_info=[s_0, None, None, None, None, None]) for k, v in updates.iteritems(): k.default_update = v shared_updates[rnn_tm1] = s_temp[-1] s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0) # Generate the output distribution at every time-step. # This is as a function of the latent variables and the hidden-state at # every time-step. theta_1_temp = theta_1.fprop([z_4_temp, s_temp], params) theta_2_temp = theta_2.fprop([theta_1_temp], params) theta_3_temp = theta_3.fprop([theta_2_temp], params) theta_4_temp = theta_4.fprop([theta_3_temp], params) theta_mu_temp = theta_mu.fprop([theta_4_temp], params) theta_sig_temp = theta_sig.fprop([theta_4_temp], params) kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp) recon = Gaussian(x, theta_mu_temp, theta_sig_temp) recon_term = recon.mean() kl_term = kl_temp.mean() nll_upper_bound = recon_term + kl_term nll_upper_bound.name = 'nll_upper_bound' # Forward-propagation of the validation data. m_x_1_temp = x_1.fprop([m_x], params) m_x_2_temp = x_2.fprop([m_x_1_temp], params) m_x_3_temp = x_3.fprop([m_x_2_temp], params) m_x_4_temp = x_4.fprop([m_x_3_temp], params) m_s_0 = rnn.get_init_state(m_batch_size) # Get the hidden-states, conditional mean, standard deviation, prior mean # and prior standard deviation of the latent variables at every time-step. ((m_s_temp, m_phi_mu_temp, m_phi_sig_temp, m_prior_mu_temp, m_prior_sig_temp, m_z_4_temp), m_updates) =\ theano.scan(fn=inner_fn, sequences=[m_x_4_temp], outputs_info=[m_s_0, None, None, None, None, None]) for k, v in m_updates.iteritems(): k.default_update = v # Get the inferred mean (X_t | Z_t) at every time-step of the validation # data. m_s_temp = concatenate([m_s_0[None, :, :], m_s_temp[:-1]], axis=0) m_theta_1_temp = theta_1.fprop([m_z_4_temp, m_s_temp], params) m_theta_2_temp = theta_2.fprop([m_theta_1_temp], params) m_theta_3_temp = theta_3.fprop([m_theta_2_temp], params) m_theta_4_temp = theta_4.fprop([m_theta_3_temp], params) m_theta_mu_temp = theta_mu.fprop([m_theta_4_temp], params) m_theta_sig_temp = theta_sig.fprop([m_theta_4_temp], params) # Compute the data log-likelihood + KL-divergence on the validation data. m_kl_temp = KLGaussianGaussian(m_phi_mu_temp, m_phi_sig_temp, m_prior_mu_temp, m_prior_sig_temp) m_recon = Gaussian(m_x, m_theta_mu_temp, m_theta_sig_temp) m_recon_term = m_recon.mean() m_kl_term = m_kl_temp.mean() m_nll_upper_bound = m_recon_term + m_kl_term m_nll_upper_bound.name = 'nll_upper_bound' m_recon_term.name = 'recon_term' m_kl_term.name = 'kl_term' max_x = m_x.max() mean_x = m_x.mean() min_x = m_x.min() max_x.name = 'max_x' mean_x.name = 'mean_x' min_x.name = 'min_x' max_theta_mu = m_theta_mu_temp.max() mean_theta_mu = m_theta_mu_temp.mean() min_theta_mu = m_theta_mu_temp.min() max_theta_mu.name = 'max_theta_mu' mean_theta_mu.name = 'mean_theta_mu' min_theta_mu.name = 'min_theta_mu' max_theta_sig = m_theta_sig_temp.max() mean_theta_sig = m_theta_sig_temp.mean() min_theta_sig = m_theta_sig_temp.min() max_theta_sig.name = 'max_theta_sig' mean_theta_sig.name = 'mean_theta_sig' min_theta_sig.name = 'min_theta_sig' max_phi_sig = m_phi_sig_temp.max() mean_phi_sig = m_phi_sig_temp.mean() min_phi_sig = m_phi_sig_temp.min() max_phi_sig.name = 'max_phi_sig' mean_phi_sig.name = 'mean_phi_sig' min_phi_sig.name = 'min_phi_sig' max_prior_sig = m_prior_sig_temp.max() mean_prior_sig = m_prior_sig_temp.mean() min_prior_sig = m_prior_sig_temp.min() max_prior_sig.name = 'max_prior_sig' mean_prior_sig.name = 'mean_prior_sig' min_prior_sig.name = 'min_prior_sig' model.inputs = [x] model.params = params model.nodes = nodes model.set_updates(shared_updates) optimizer = Adam(lr=lr) monitor_fn = theano.function( inputs=[m_x], outputs=[ m_nll_upper_bound, m_recon_term, m_kl_term, max_phi_sig, mean_phi_sig, min_phi_sig, max_prior_sig, mean_prior_sig, min_prior_sig, max_theta_sig, mean_theta_sig, min_theta_sig, max_x, mean_x, min_x, max_theta_mu, mean_theta_mu, min_theta_mu ], on_unused_input='ignore') extension = [ GradientClipping(batch_size=batch_size, check_nan=1), EpochCount(epoch), Monitoring(freq=monitoring_freq, monitor_fn=monitor_fn, ddout=[ m_nll_upper_bound, m_recon_term, m_kl_term, max_phi_sig, mean_phi_sig, min_phi_sig, max_prior_sig, mean_prior_sig, min_prior_sig, max_theta_sig, mean_theta_sig, min_theta_sig, max_x, mean_x, min_x, max_theta_mu, mean_theta_mu, min_theta_mu ], data=[ Iterator(train_data, m_batch_size, start=0, end=112640), Iterator(valid_data, m_batch_size, start=2040064, end=2152704) ]), Picklize(freq=monitoring_freq, force_save_freq=force_saving_freq, path=save_path), EarlyStopping(freq=monitoring_freq, force_save_freq=force_saving_freq, path=save_path, channel=channel_name), WeightNorm() ] mainloop = Training(name=pkl_name, data=Iterator(train_data, batch_size, start=0, end=2040064), model=model, optimizer=optimizer, cost=nll_upper_bound, outputs=[nll_upper_bound], extension=extension) mainloop.run()
def main(args): #theano.optimizer='fast_compile' #theano.config.exception_verbosity='high' trial = int(args['trial']) pkl_name = 'dp_dis1-sch_%d' % trial channel_name = 'mae' data_path = args['data_path'] save_path = args[ 'save_path'] #+'/gmm/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M") flgMSE = int(args['flgMSE']) period = int(args['period']) n_steps = int(args['n_steps']) stride_train = int(args['stride_train']) stride_test = n_steps # int(args['stride_test']) monitoring_freq = int(args['monitoring_freq']) epoch = int(args['epoch']) batch_size = int(args['batch_size']) x_dim = int(args['x_dim']) y_dim = int(args['y_dim']) flgAgg = int(args['flgAgg']) z_dim = int(args['z_dim']) rnn_dim = int(args['rnn_dim']) k = int(args['num_k']) #a mixture of K Gaussian functions lr = float(args['lr']) typeLoad = int(args['typeLoad']) debug = int(args['debug']) num_sequences_per_batch = int(args['numSequences']) #based on appliance typeLoad = int(args['typeLoad']) target_inclusion_prob = float(args['target_inclusion_prob']) kSchedSamp = int(args['kSchedSamp']) print "trial no. %d" % trial print "batch size %d" % batch_size print "learning rate %f" % lr print "saving pkl file '%s'" % pkl_name print "to the save path '%s'" % save_path q_z_dim = 150 p_z_dim = 150 p_x_dim = 150 #250 x2s_dim = 100 #250 y2s_dim = 100 z2s_dim = 100 #150 target_dim = k #x_dim #(x_dim-1)*k model = Model() Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_redd( data_path, windows, appliances, numApps=flgAgg, period=period, n_steps=n_steps, stride_train=stride_train, stride_test=stride_test, trainPer=0.6, valPer=0.2, testPer=0.2, typeLoad=typeLoad, flgAggSumScaled=1, flgFilterZeros=1, seq_per_batch=num_sequences_per_batch, target_inclusion_prob=target_inclusion_prob) print(reader.meanTraining, reader.stdTraining) instancesPlot = { 0: [4], 2: [10] } #for now use hard coded instancesPlot for kelly sampling train_data = Redd( name='train', prep='normalize', cond=True, # False #path=data_path, inputX=Xtrain, labels=ytrain) X_mean = train_data.X_mean X_std = train_data.X_std valid_data = Redd( name='valid', prep='normalize', cond=True, # False #path=data_path, X_mean=X_mean, X_std=X_std, inputX=Xval, labels=yval) test_data = Redd( name='valid', prep='normalize', cond=True, # False #path=data_path, X_mean=X_mean, X_std=X_std, inputX=Xtest, labels=ytest) init_W = InitCell('rand') init_U = InitCell('ortho') init_b = InitCell('zeros') init_b_sig = InitCell('const', mean=0.6) x, mask, y, y_mask = train_data.theano_vars() scheduleSamplingMask = T.fvector('schedMask') x.name = 'x_original' if debug: x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32) temp = np.ones((15, batch_size), dtype=np.float32) temp[:, -2:] = 0. mask.tag.test_value = temp x_1 = FullyConnectedLayer(name='x_1', parent=['x_t'], parent_dim=[x_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) y_1 = FullyConnectedLayer(name='y_1', parent=['y_t'], parent_dim=[y_dim], nout=y2s_dim, unit='relu', init_W=init_W, init_b=init_b) z_1 = FullyConnectedLayer(name='z_1', parent=['z_t'], parent_dim=[z_dim], nout=z2s_dim, unit='relu', init_W=init_W, init_b=init_b) rnn = LSTM(name='rnn', parent=['x_1', 'z_1', 'y_1'], parent_dim=[x2s_dim, z2s_dim, y_dim], nout=rnn_dim, unit='tanh', init_W=init_W, init_U=init_U, init_b=init_b) phi_1 = FullyConnectedLayer(name='phi_1', parent=['x_1', 's_tm1', 'y_1'], parent_dim=[x2s_dim, rnn_dim, y2s_dim], nout=q_z_dim, unit='relu', init_W=init_W, init_b=init_b) phi_mu = FullyConnectedLayer(name='phi_mu', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) phi_sig = FullyConnectedLayer(name='phi_sig', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) prior_1 = FullyConnectedLayer(name='prior_1', parent=['x_1', 's_tm1'], parent_dim=[x2s_dim, rnn_dim], nout=p_z_dim, unit='relu', init_W=init_W, init_b=init_b) prior_mu = FullyConnectedLayer(name='prior_mu', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) prior_sig = FullyConnectedLayer(name='prior_sig', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_1 = FullyConnectedLayer(name='theta_1', parent=['z_1', 's_tm1'], parent_dim=[z2s_dim, rnn_dim], nout=p_x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_mu = FullyConnectedLayer(name='theta_mu', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_sig = FullyConnectedLayer(name='theta_sig', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) coeff = FullyConnectedLayer(name='coeff', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) corr = FullyConnectedLayer(name='corr', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='tanh', init_W=init_W, init_b=init_b) binary = FullyConnectedLayer(name='binary', parent=['theta_1'], parent_dim=[p_x_dim], nout=1, unit='sigmoid', init_W=init_W, init_b=init_b) nodes = [ rnn, x_1, y_1, z_1, #dissag_pred, phi_1, phi_mu, phi_sig, prior_1, prior_mu, prior_sig, theta_1, theta_mu, theta_sig, coeff ] #, corr, binary params = OrderedDict() for node in nodes: if node.initialize() is not None: params.update(node.initialize()) params = init_tparams(params) s_0 = rnn.get_init_state(batch_size) x_1_temp = x_1.fprop([x], params) y_1_temp = y_1.fprop([y], params) def inner_fn_val(x_t, s_tm1): prior_1_t = prior_1.fprop([x_t, s_tm1], params) prior_mu_t = prior_mu.fprop([prior_1_t], params) prior_sig_t = prior_sig.fprop([prior_1_t], params) z_t = Gaussian_sample(prior_mu_t, prior_sig_t) z_1_t = z_1.fprop([z_t], params) theta_1_t = theta_1.fprop([z_1_t, s_tm1], params) theta_mu_t = theta_mu.fprop([theta_1_t], params) theta_sig_t = theta_sig.fprop([theta_1_t], params) coeff_t = coeff.fprop([theta_1_t], params) pred_t = GMM_sample(theta_mu_t, theta_sig_t, coeff_t) #Gaussian_sample(theta_mu_t, theta_sig_t) pred_1_t = y_1.fprop([pred_t], params) s_t = rnn.fprop([[x_t, z_1_t, pred_1_t], [s_tm1]], params) #y_pred = dissag_pred.fprop([s_t], params) return s_t, prior_mu_t, prior_sig_t, theta_mu_t, theta_sig_t, coeff_t, pred_t #, y_pred #corr_temp, binary_temp ((s_temp_val, prior_mu_temp_val, prior_sig_temp_val, theta_mu_temp_val, theta_sig_temp_val, coeff_temp_val, prediction_val), updates_val) =\ theano.scan(fn=inner_fn_val, sequences=[x_1_temp], outputs_info=[s_0, None, None, None, None, None, None]) for k, v in updates_val.iteritems(): k.default_update = v s_temp_val = concatenate([s_0[None, :, :], s_temp_val[:-1]], axis=0) def inner_fn_train(x_t, y_t, schedSampMask, s_tm1): phi_1_t = phi_1.fprop([x_t, s_tm1, y_t], params) phi_mu_t = phi_mu.fprop([phi_1_t], params) phi_sig_t = phi_sig.fprop([phi_1_t], params) prior_1_t = prior_1.fprop([x_t, s_tm1], params) prior_mu_t = prior_mu.fprop([prior_1_t], params) prior_sig_t = prior_sig.fprop([prior_1_t], params) z_t = Gaussian_sample(phi_mu_t, phi_sig_t) z_1_t = z_1.fprop([z_t], params) theta_1_t = theta_1.fprop([z_1_t, s_tm1], params) theta_mu_t = theta_mu.fprop([theta_1_t], params) theta_sig_t = theta_sig.fprop([theta_1_t], params) coeff_t = coeff.fprop([theta_1_t], params) #corr_t = corr.fprop([theta_1_t], params) #binary_t = binary.fprop([theta_1_t], params) pred = GMM_sample(theta_mu_t, theta_sig_t, coeff_t) #Gaussian_sample(theta_mu_t, theta_sig_t) if (schedSampMask == 1): s_t = rnn.fprop([[x_t, z_1_t, y_t], [s_tm1]], params) else: y_t_aux = y_1.fprop([pred], params) s_t = rnn.fprop([[x_t, z_1_t, y_t_aux], [s_tm1]], params) #y_pred = dissag_pred.fprop([s_t], params) return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, theta_mu_t, theta_sig_t, coeff_t, pred #, y_pred #corr_temp, binary_temp ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp, theta_mu_temp, theta_sig_temp, coeff_temp, prediction), updates) =\ theano.scan(fn=inner_fn_train, sequences=[x_1_temp, y_1_temp, scheduleSamplingMask], outputs_info=[s_0, None, None, None, None, None, None, None, None]) for k, v in updates.iteritems(): k.default_update = v #s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0)# seems like this is for creating an additional dimension to s_0 theta_mu_temp.name = 'theta_mu_temp' theta_sig_temp.name = 'theta_sig_temp' coeff_temp.name = 'coeff' if (flgAgg == -1): prediction.name = 'x_reconstructed' mse = T.mean((prediction - x)**2) # CHECK RESHAPE with an assertion mae = T.mean(T.abs(prediction - x)) mse.name = 'mse' pred_in = x.reshape((x_shape[0] * x_shape[1], -1)) else: prediction.name = 'pred_' + str(flgAgg) mse = T.mean( (prediction - y)**2) # As axis = None is calculated for all mae = T.mean(T.abs_(prediction - y)) mse.name = 'mse' mae.name = 'mae' pred_in = y.reshape((y.shape[0] * y.shape[1], -1)) kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp) x_shape = x.shape theta_mu_in = theta_mu_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig_in = theta_sig_temp.reshape((x_shape[0] * x_shape[1], -1)) coeff_in = coeff_temp.reshape((x_shape[0] * x_shape[1], -1)) #corr_in = corr_temp.reshape((x_shape[0]*x_shape[1], -1)) #binary_in = binary_temp.reshape((x_shape[0]*x_shape[1], -1)) recon = GMM( pred_in, theta_mu_in, theta_sig_in, coeff_in ) # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in) recon = recon.reshape((x_shape[0], x_shape[1])) recon.name = 'gmm_out' recon_term = recon.sum(axis=0).mean() recon_term.name = 'recon_term' kl_term = kl_temp.sum(axis=0).mean() kl_term.name = 'kl_term' nll_upper_bound = recon_term + kl_term #+ mse if (flgMSE): nll_upper_bound = nll_upper_bound + mse nll_upper_bound.name = 'nll_upper_bound' ######################## TEST (GENERATION) TIME prediction_val.name = 'generated__' + str(flgAgg) mse_val = T.mean( (prediction_val - y)**2) # As axis = None is calculated for all mae_val = T.mean(T.abs_(prediction_val - y)) #y_unNormalize = (y * reader.stdTrain) + reader.meanTrain # accessing to just an scalar when loading y_dim=1 #prediction_valAux = (prediction_val * reader.stdTrain) + reader.meanTrain #mse_valUnNorm = T.mean((prediction_valAux - y_unNormalize)**2) # As axis = None is calculated for all #mae_valUnNorm = T.mean( T.abs_(prediction_valAux - y_unNormalize) ) mse_val.name = 'mse_val' mae_val.name = 'mae_val' pred_in_val = y.reshape((y.shape[0] * y.shape[1], -1)) theta_mu_in_val = theta_mu_temp_val.reshape((x_shape[0] * x_shape[1], -1)) theta_sig_in_val = theta_sig_temp_val.reshape( (x_shape[0] * x_shape[1], -1)) coeff_in_val = coeff_temp_val.reshape((x_shape[0] * x_shape[1], -1)) recon_val = GMM( pred_in_val, theta_mu_in_val, theta_sig_in_val, coeff_in_val ) # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in) recon_val = recon_val.reshape((x_shape[0], x_shape[1])) recon_val.name = 'gmm_out_val' recon_term_val = recon_val.sum(axis=0).mean() recon_term_val.name = 'recon_term_val' model.inputs = [x, mask, y, y_mask, scheduleSamplingMask] model.params = params model.nodes = nodes optimizer = Adam(lr=lr) header = "epoch,log,kl,nll_upper_bound,mse,mae\n" extension = [ GradientClipping(batch_size=batch_size), EpochCount(epoch, save_path, header), Monitoring( freq=monitoring_freq, ddout=[nll_upper_bound, recon_term, kl_term, mse, mae, prediction], indexSep=5, instancesPlot=instancesPlot, #{0:[4,20],2:[5,10]},#, 80,150 data=[Iterator(valid_data, batch_size)], savedFolder=save_path), Picklize(freq=monitoring_freq, path=save_path), EarlyStopping(freq=monitoring_freq, path=save_path, channel=channel_name), WeightNorm() ] lr_iterations = {0: lr} mainloop = Training( name=pkl_name, data=Iterator(train_data, batch_size), model=model, optimizer=optimizer, cost=nll_upper_bound, outputs=[recon_term, kl_term, nll_upper_bound, mse, mae], n_steps=n_steps, extension=extension, lr_iterations=lr_iterations, k_speedOfconvergence=kSchedSamp) mainloop.run() data = Iterator(test_data, batch_size) test_fn = theano.function( inputs=[x, y], #[x, y], #givens={x:Xtest}, #on_unused_input='ignore', #z=( ,200,1) allow_input_downcast=True, outputs=[prediction_val, recon_term_val, mse_val, mae_val] #prediction_val, mse_val, mae_val , updates= updates_val #, allow_input_downcast=True, on_unused_input='ignore' ) testOutput = [] numBatchTest = 0 for batch in data: outputGeneration = test_fn(batch[0], batch[2]) #(20, 220, 1) testOutput.append(outputGeneration[1:]) plt.figure(4) plt.plot(np.transpose(outputGeneration[0], [1, 0, 2])[4]) plt.plot(np.transpose(batch[2], [1, 0, 2])[4]) plt.savefig( save_path + "/vrnn_dis_generated{}_RealAndPred_0-4".format(numBatchTest)) plt.clf() plt.figure(4) plt.plot(np.transpose(batch[0], [1, 0, 2])[4]) plt.savefig(save_path + "/vrnn_dis_generated{}_Realagg_0-4".format(numBatchTest)) plt.clf() numBatchTest += 1 testOutput = np.asarray(testOutput) print(testOutput.shape) recon_test = testOutput[:, 0].mean() mse_test = testOutput[:, 1].mean() mae_test = testOutput[:, 2].mean() #mseUnNorm_test = testOutput[:, 3].mean() #maeUnNorm_test = testOutput[:, 4].mean() fLog = open(save_path + '/output.csv', 'w') fLog.write(str(lr_iterations) + "\n") fLog.write(str(windows) + "\n") fLog.write("logTest,mseTest,maeTest, mseTestUnNorm, maeTestUnNorm\n") fLog.write("{},{},{}\n".format(recon_test, mse_test, mae_test)) fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,y2s_dim,z2s_dim\n") fLog.write("{},{},{},{},{},{}\n".format(q_z_dim, p_z_dim, p_x_dim, x2s_dim, y2s_dim, z2s_dim)) header = "epoch,log,kl,mse,mae\n" fLog.write(header) for i, item in enumerate(mainloop.trainlog.monitor['recon_term']): f = mainloop.trainlog.monitor['epoch'][i] a = mainloop.trainlog.monitor['recon_term'][i] b = mainloop.trainlog.monitor['kl_term'][i] d = mainloop.trainlog.monitor['mse'][i] e = mainloop.trainlog.monitor['mae'][i] fLog.write("{:d},{:.2f},{:.2f},{:.3f},{:.3f}\n".format(f, a, b, d, e))
cost.name = 'cost' recon_term.name = 'recon_term' kl_term.name = 'kl_term' recon_err = ((x - T.nnet.sigmoid(canvas_out[-1]))**2).mean() / x.std() recon_err.name = 'recon_err' model.inputs = [x] model._params = params model.nodes = nodes optimizer = Adam(lr=0.001) extension = [ GradientClipping(batch_size=batch_size), EpochCount(10000), Monitoring(freq=100, ddout=[cost, recon_term, kl_term, recon_err], data=[Iterator(data, batch_size)]), Picklize(freq=2000, path=savepath), EarlyStopping(freq=500, path=savepath) ] mainloop = Training(name='draw', data=Iterator(data, batch_size), model=model, optimizer=optimizer, cost=cost, outputs=[cost], extension=extension) mainloop.run()
def main(args): theano.optimizer = 'fast_compile' theano.config.exception_verbosity = 'high' trial = int(args['trial']) pkl_name = 'rnn_gmm_%d' % trial channel_name = 'valid_nll' data_path = args['data_path'] save_path = args['save_path'] flgMSE = int(args['flgMSE']) period = int(args['period']) n_steps = int(args['n_steps']) stride_train = int(args['stride_train']) stride_test = n_steps # int(args['stride_test']) monitoring_freq = int(args['monitoring_freq']) epoch = int(args['epoch']) batch_size = int(args['batch_size']) x_dim = int(args['x_dim']) z_dim = int(args['z_dim']) y_dim = int(args['y_dim']) flgAgg = int(args['flgAgg']) rnn_dim = int(args['rnn_dim']) k = int(args['num_k']) lr = float(args['lr']) debug = int(args['debug']) print "trial no. %d" % trial print "batch size %d" % batch_size print "learning rate %f" % lr print "saving pkl file '%s'" % pkl_name print "to the save path '%s'" % save_path x2s_dim = 50 #300 s2x_dim = 50 #300 target_dim = k #(x_dim-1)*k model = Model() Xtrain, ytrain, Xval, yval = fetch_ukdale(data_path, windows, appliances, numApps=flgAgg, period=period, n_steps=n_steps, stride_train=stride_train, stride_test=stride_test) print("Inside: ", Xtrain.shape, ytrain.shape, Xval.shape, yval.shape) train_data = UKdale( name='train', prep='normalize', cond=True, # False #path=data_path, inputX=Xtrain, labels=ytrain) X_mean = train_data.X_mean X_std = train_data.X_std valid_data = UKdale( name='valid', prep='normalize', cond=True, # False #path=data_path, X_mean=X_mean, X_std=X_std, inputX=Xval, labels=yval) init_W = InitCell('rand') init_U = InitCell('ortho') init_b = InitCell('zeros') init_b_sig = InitCell('const', mean=0.6) x, mask, y, y_mask = train_data.theano_vars() #mask, y_mask ''' if debug: x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32) temp = np.ones((15, batch_size), dtype=np.float32) temp[:, -2:] = 0. mask.tag.test_value = temp ''' x_1 = FullyConnectedLayer(name='x_1', parent=['x_t'], parent_dim=[x_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) rnn = LSTM(name='rnn', parent=['x_1'], parent_dim=[x2s_dim], nout=rnn_dim, unit='tanh', init_W=init_W, init_U=init_U, init_b=init_b) theta_1 = FullyConnectedLayer(name='theta_1', parent=['s_tm1'], parent_dim=[rnn_dim], nout=s2x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_mu = FullyConnectedLayer(name='theta_mu', parent=['theta_1'], parent_dim=[s2x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_sig = FullyConnectedLayer(name='theta_sig', parent=['theta_1'], parent_dim=[s2x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) coeff = FullyConnectedLayer(name='coeff', parent=['theta_1'], parent_dim=[s2x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) ''' corr = FullyConnectedLayer(name='corr', parent=['theta_1'], parent_dim=[s2x_dim], nout=k, unit='tanh', init_W=init_W, init_b=init_b) binary = FullyConnectedLayer(name='binary', parent=['theta_1'], parent_dim=[s2x_dim], nout=1, unit='sigmoid', init_W=init_W, init_b=init_b) ''' nodes = [rnn, x_1, theta_1, theta_mu, theta_sig, coeff] params = OrderedDict() for node in nodes: if node.initialize() is not None: params.update(node.initialize()) params = init_tparams(params) s_0 = rnn.get_init_state(batch_size) x_shape = x.shape x_1_temp = x_1.fprop([x], params) def inner_fn(x_t, s_tm1): s_t = rnn.fprop([[x_t], [s_tm1]], params) theta_1_t = theta_1.fprop([s_t], params) theta_mu_t = theta_mu.fprop([theta_1_t], params) theta_sig_t = theta_sig.fprop([theta_1_t], params) coeff_t = coeff.fprop([theta_1_t], params) pred = GMM_sample(theta_mu_t, theta_sig_t, coeff_t) return s_t, theta_mu_t, theta_sig_t, coeff_t, pred ((s_temp, theta_mu_temp, theta_sig_temp, coeff_temp, pred_temp), updates) = theano.scan(fn=inner_fn, sequences=[x_1_temp], outputs_info=[s_0, None, None, None, None]) for k, v in updates.iteritems(): k.default_update = v s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0) ''' theta_1_temp = theta_1.fprop([s_temp], params) theta_mu_temp = theta_mu.fprop([theta_1_temp], params) theta_sig_temp = theta_sig.fprop([theta_1_temp], params) coeff_temp = coeff.fprop([theta_1_temp], params) corr_temp = corr.fprop([theta_1_temp], params) binary_temp = binary.fprop([theta_1_temp], params) ''' x_shape = x.shape x_in = x.reshape((x_shape[0] * x_shape[1], -1)) theta_mu_in = theta_mu_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig_in = theta_sig_temp.reshape((x_shape[0] * x_shape[1], -1)) coeff_in = coeff_temp.reshape((x_shape[0] * x_shape[1], -1)) #corr_in = corr_temp.reshape((x_shape[0]*x_shape[1], -1)) #binary_in = binary_temp.reshape((x_shape[0]*x_shape[1], -1)) if (flgAgg == -1): pred_temp.name = 'x_reconstructed' mse = T.mean((pred_temp - x)**2) # CHECK RESHAPE with an assertion mae = T.mean(T.abs(pred_temp - x)) mse.name = 'mse' pred_in = x.reshape((x_shape[0] * x_shape[1], -1)) else: #pred_temp = pred_temp.reshape((pred_temp.shape[0], pred_temp.shape[1])) pred_temp.name = 'pred_' + str(flgAgg) #y[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1)) mse = T.mean((pred_temp - y)**2) # CHECK RESHAPE with an assertion mae = T.mean(T.abs_(pred_temp - y)) mse.name = 'mse' mae.name = 'mae' pred_in = y.reshape((y.shape[0] * y.shape[1], -1)) recon = GMM(pred_in, theta_mu_in, theta_sig_in, coeff_in) #, binary_in recon.name = 'recon' recon = recon.reshape((y.shape[0], y.shape[1])) #recon = recon * y_mask #(200, 1000), (1000, 200) recon_term = recon.sum(axis=0).mean() recon_term.name = 'nll' max_x = x.max() mean_x = x.mean() min_x = x.min() max_x.name = 'max_x' mean_x.name = 'mean_x' min_x.name = 'min_x' max_theta_mu = theta_mu_in.max() mean_theta_mu = theta_mu_in.mean() min_theta_mu = theta_mu_in.min() max_theta_mu.name = 'max_theta_mu' mean_theta_mu.name = 'mean_theta_mu' min_theta_mu.name = 'min_theta_mu' ''' max_theta_sig = theta_sig_in.max() mean_theta_sig = theta_sig_in.mean() min_theta_sig = theta_sig_in.min() max_theta_sig.name = 'max_theta_sig' mean_theta_sig.name = 'mean_theta_sig' min_theta_sig.name = 'min_theta_sig' coeff_max = coeff_in.max() coeff_min = coeff_in.min() coeff_mean_max = coeff_in.mean(axis=0).max() coeff_mean_min = coeff_in.mean(axis=0).min() coeff_max.name = 'coeff_max' coeff_min.name = 'coeff_min' coeff_mean_max.name = 'coeff_mean_max' coeff_mean_min.name = 'coeff_mean_min' ''' model.inputs = [x, mask, y, y_mask] model.params = params model.nodes = nodes optimizer = Adam(lr=lr) extension = [ GradientClipping(batch_size=batch_size), EpochCount(epoch), Monitoring( freq=monitoring_freq, ddout=[ recon_term, mse, mae, #max_theta_sig, mean_theta_sig, min_theta_sig, max_x, mean_x, min_x, max_theta_mu, mean_theta_mu, min_theta_mu, #coeff_max, coeff_min, coeff_mean_max, coeff_mean_min,#16 theta_mu_temp, theta_sig_temp, pred_temp, coeff_temp, s_temp ], indexSep=9, indexDDoutPlot=[(0, theta_mu_temp), (2, pred_temp)], instancesPlot=[10, 100], #, 80,150 savedFolder=save_path, data=[Iterator(valid_data, batch_size)]), Picklize(freq=monitoring_freq, path=save_path), EarlyStopping(freq=monitoring_freq, path=save_path, channel=channel_name), WeightNorm() ] lr_iterations = {0: lr, 20: (lr / 10), 150: (lr / 100), 200: (lr / 1000)} mainloop = Training(name=pkl_name, data=Iterator(train_data, batch_size), model=model, optimizer=optimizer, cost=recon_term, outputs=[recon_term], extension=extension, lr_iterations=lr_iterations) mainloop.run() fLog = open(save_path + '/output.csv', 'w') fLog.write("log,mse,mae\n") for i, item in enumerate(mainloop.trainlog.monitor['nll']): a = mainloop.trainlog.monitor['nll'][i] d = mainloop.trainlog.monitor['mse'][i] e = mainloop.trainlog.monitor['mae'][i] fLog.write("{},{},{}\n".format(a, d, e))
def main(args): theano.optimizer = 'fast_compile' theano.config.exception_verbosity = 'high' trial = int(args['trial']) pkl_name = 'vrnn_gauss_%d' % trial channel_name = 'valid_nll_upper_bound' data_path = args['data_path'] save_path = args['save_path'] save_path = args['save_path'] period = int(args['period']) n_steps = int(args['n_steps']) stride_train = int(args['stride_train']) stride_test = int(args['stride_test']) monitoring_freq = int(args['monitoring_freq']) epoch = int(args['epoch']) batch_size = int(args['batch_size']) x_dim = int(args['x_dim']) z_dim = int(args['z_dim']) rnn_dim = int(args['rnn_dim']) lr = float(args['lr']) debug = int(args['debug']) print "trial no. %d" % trial print "batch size %d" % batch_size print "learning rate %f" % lr print "saving pkl file '%s'" % pkl_name print "to the save path '%s'" % save_path q_z_dim = 150 p_z_dim = 150 p_x_dim = 250 x2s_dim = 10 #250 z2s_dim = 10 #150 target_dim = x_dim #(x_dim-1) model = Model() train_data = UKdale( name='train', prep='none', #normalize cond=False, path=data_path, period=period, n_steps=n_steps, x_dim=x_dim, stride_train=stride_train, stride_test=stride_test) X_mean = train_data.X_mean X_std = train_data.X_std valid_data = UKdale( name='valid', prep='none', #normalize cond=False, path=data_path, X_mean=X_mean, X_std=X_std) init_W = InitCell('rand') init_U = InitCell('ortho') init_b = InitCell('zeros') init_b_sig = InitCell('const', mean=0.6) x, mask = train_data.theano_vars() if debug: x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32) temp = np.ones((15, batch_size), dtype=np.float32) temp[:, -2:] = 0. mask.tag.test_value = temp x_1 = FullyConnectedLayer( name='x_1', parent=['x_t'], #OrderDict parent['x_t'] = x_dim parent_dim=[x_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) z_1 = FullyConnectedLayer(name='z_1', parent=['z_t'], parent_dim=[z_dim], nout=z2s_dim, unit='relu', init_W=init_W, init_b=init_b) rnn = LSTM(name='rnn', parent=['x_1', 'z_1'], parent_dim=[x2s_dim, z2s_dim], nout=rnn_dim, unit='tanh', init_W=init_W, init_U=init_U, init_b=init_b) phi_1 = FullyConnectedLayer( name='phi_1', ## encoder parent=['x_1', 's_tm1'], parent_dim=[x2s_dim, rnn_dim], nout=q_z_dim, unit='relu', init_W=init_W, init_b=init_b) phi_mu = FullyConnectedLayer(name='phi_mu', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) phi_sig = FullyConnectedLayer(name='phi_sig', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) prior_1 = FullyConnectedLayer(name='prior_1', parent=['s_tm1'], parent_dim=[rnn_dim], nout=p_z_dim, unit='relu', init_W=init_W, init_b=init_b) prior_mu = FullyConnectedLayer(name='prior_mu', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) prior_sig = FullyConnectedLayer(name='prior_sig', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_1 = FullyConnectedLayer( name='theta_1', ### decoder parent=['z_1', 's_tm1'], parent_dim=[z2s_dim, rnn_dim], nout=p_x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_mu = FullyConnectedLayer(name='theta_mu', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_sig = FullyConnectedLayer(name='theta_sig', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) corr = FullyConnectedLayer( name='corr', ## rho parent=['theta_1'], parent_dim=[p_x_dim], nout=1, unit='tanh', init_W=init_W, init_b=init_b) binary = FullyConnectedLayer(name='binary', parent=['theta_1'], parent_dim=[p_x_dim], nout=1, unit='sigmoid', init_W=init_W, init_b=init_b) nodes = [ rnn, x_1, z_1, phi_1, phi_mu, phi_sig, prior_1, prior_mu, prior_sig, theta_1, theta_mu, theta_sig ] #, corr, binary params = OrderedDict() for node in nodes: if node.initialize() is not None: params.update( node.initialize() ) #Initialize values of the W matrices according to dim of parents params = init_tparams(params) s_0 = rnn.get_init_state(batch_size) x_1_temp = x_1.fprop([x], params) def inner_fn(x_t, s_tm1): phi_1_t = phi_1.fprop([x_t, s_tm1], params) phi_mu_t = phi_mu.fprop([phi_1_t], params) phi_sig_t = phi_sig.fprop([phi_1_t], params) prior_1_t = prior_1.fprop([s_tm1], params) prior_mu_t = prior_mu.fprop([prior_1_t], params) prior_sig_t = prior_sig.fprop([prior_1_t], params) z_t = Gaussian_sample(phi_mu_t, phi_sig_t) z_1_t = z_1.fprop([z_t], params) s_t = rnn.fprop([[x_t, z_1_t], [s_tm1]], params) return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_1_t ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp, z_1_temp), updates) =\ theano.scan(fn=inner_fn, sequences=[x_1_temp], #non_sequences unchanging variables #The tensor(s) to be looped over should be provided to scan using the sequence keyword argument outputs_info=[s_0, None, None, None, None, None])#Initialization occurs in outputs_info for k, v in updates.iteritems(): print("Update") k.default_update = v s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0) s_temp.name = 'h_1' #gisse z_1_temp.name = 'z_1' #gisse theta_1_temp = theta_1.fprop([z_1_temp, s_temp], params) theta_mu_temp = theta_mu.fprop([theta_1_temp], params) theta_mu_temp.name = 'theta_mu' theta_sig_temp = theta_sig.fprop([theta_1_temp], params) theta_sig_temp.name = 'theta_sig' #corr_temp = corr.fprop([theta_1_temp], params) #corr_temp.name = 'corr' #binary_temp = binary.fprop([theta_1_temp], params) #binary_temp.name = 'binary' kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp) x_shape = x.shape x_in = x.reshape((x_shape[0] * x_shape[1], -1)) theta_mu_in = theta_mu_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig_in = theta_sig_temp.reshape((x_shape[0] * x_shape[1], -1)) #corr_in = corr_temp.reshape((x_shape[0]*x_shape[1], -1)) #binary_in = binary_temp.reshape((x_shape[0]*x_shape[1], -1)) recon = Gaussian( x_in, theta_mu_in, theta_sig_in ) # BiGauss(x_in, theta_mu_in, theta_sig_in, corr_in, binary_in) # second term for the loss function recon = recon.reshape((x_shape[0], x_shape[1])) #recon = recon * mask recon_term = recon.sum(axis=0).mean() recon_term.name = 'recon_term' #kl_temp = kl_temp * mask kl_term = kl_temp.sum(axis=0).mean() kl_term.name = 'kl_term' nll_upper_bound = recon_term + kl_term nll_upper_bound.name = 'nll_upper_bound' max_x = x.max() mean_x = x.mean() min_x = x.min() max_x.name = 'max_x' mean_x.name = 'mean_x' min_x.name = 'min_x' max_theta_mu = theta_mu_in.max() mean_theta_mu = theta_mu_in.mean() min_theta_mu = theta_mu_in.min() max_theta_mu.name = 'max_theta_mu' mean_theta_mu.name = 'mean_theta_mu' min_theta_mu.name = 'min_theta_mu' max_theta_sig = theta_sig_in.max() mean_theta_sig = theta_sig_in.mean() min_theta_sig = theta_sig_in.min() max_theta_sig.name = 'max_theta_sig' mean_theta_sig.name = 'mean_theta_sig' min_theta_sig.name = 'min_theta_sig' max_phi_sig = phi_sig_temp.max() mean_phi_sig = phi_sig_temp.mean() min_phi_sig = phi_sig_temp.min() max_phi_sig.name = 'max_phi_sig' mean_phi_sig.name = 'mean_phi_sig' min_phi_sig.name = 'min_phi_sig' max_prior_sig = prior_sig_temp.max() mean_prior_sig = prior_sig_temp.mean() min_prior_sig = prior_sig_temp.min() max_prior_sig.name = 'max_prior_sig' mean_prior_sig.name = 'mean_prior_sig' min_prior_sig.name = 'min_prior_sig' prior_sig_output = prior_sig_temp prior_sig_output.name = 'prior_sig_o' phi_sig_output = phi_sig_temp phi_sig_output.name = 'phi_sig_o' model.inputs = [x, mask] model.params = params model.nodes = nodes optimizer = Adam(lr=lr) extension = [ GradientClipping(batch_size=batch_size), EpochCount(epoch), Monitoring( freq=monitoring_freq, ddout=[ nll_upper_bound, recon_term, kl_term, max_phi_sig, mean_phi_sig, min_phi_sig, max_prior_sig, mean_prior_sig, min_prior_sig, max_theta_sig, mean_theta_sig, min_theta_sig, max_x, mean_x, min_x, max_theta_mu, mean_theta_mu, min_theta_mu, #0-16 #binary_temp, corr_temp, theta_mu_temp, theta_sig_temp, #17-20 s_temp, z_1_temp #phi_sig_output,phi_sig_output ], ## added in order to explore the distributions indexSep=18, data=[Iterator(valid_data, batch_size)]), Picklize(freq=monitoring_freq, path=save_path), EarlyStopping(freq=monitoring_freq, path=save_path, channel=channel_name), WeightNorm() ] mainloop = Training(name=pkl_name, data=Iterator(train_data, batch_size), model=model, optimizer=optimizer, cost=nll_upper_bound, outputs=[nll_upper_bound], extension=extension) mainloop.run() results = mainloop.extension[2].lastResults seq = self.inference(results, n_steps) plot_lines_iamondb_example( X=seq ) #X, y=None, equal=True, show=False, save=False, save_name="tmp.png"):
def main(args): #theano.optimizer='fast_compile' #theano.config.exception_verbosity='high' trial = int(args['trial']) pkl_name = 'vrnn_gmm_%d' % trial channel_name = 'valid_nll_upper_bound' data_path = args['data_path'] save_path = args[ 'save_path'] #+'/gmm/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M") flgMSE = int(args['flgMSE']) period = int(args['period']) n_steps = int(args['n_steps']) stride_train = int(args['stride_train']) stride_test = n_steps # int(args['stride_test']) monitoring_freq = int(args['monitoring_freq']) epoch = int(args['epoch']) batch_size = int(args['batch_size']) x_dim = int(args['x_dim']) y_dim = int(args['y_dim']) flgAgg = int(args['flgAgg']) z_dim = int(args['z_dim']) rnn_dim = int(args['rnn_dim']) k = int(args['num_k']) #a mixture of K Gaussian functions lr = float(args['lr']) debug = int(args['debug']) num_sequences_per_batch = int(args['numSequences']) #based on appliance loadParam = args['loadAsKelly'] target_inclusion_prob = float(args['target_inclusion_prob']) loadAsKelly = True if (loadParam == 'N' or loadParam == 'n' or loadParam == 'no' or loadParam == 'NO' or loadParam == 'No'): loadAsKelly = False print "trial no. %d" % trial print "batch size %d" % batch_size print "learning rate %f" % lr print "saving pkl file '%s'" % pkl_name print "to the save path '%s'" % save_path q_z_dim = 100 #150 p_z_dim = 60 #150 p_x_dim = 20 #250 x2s_dim = 40 #250 z2s_dim = 40 #150 target_dim = k #x_dim #(x_dim-1)*k model = Model() Xtrain, ytrain, Xval, yval, reader = fetch_ukdale( data_path, windows, appliances, numApps=flgAgg, period=period, n_steps=n_steps, stride_train=stride_train, stride_test=stride_test, flgAggSumScaled=1, flgFilterZeros=1, isKelly=loadAsKelly, seq_per_batch=num_sequences_per_batch, target_inclusion_prob=target_inclusion_prob) instancesPlot = { 0: [4, 20], 2: [5, 10] } #for now use hard coded instancesPlot for kelly sampling if (not loadAsKelly): instancesPlot = reader.build_dict_instances_plot( listDates, batch_size, Xval.shape[0]) train_data = UKdale( name='train', prep='normalize', cond=True, # False #path=data_path, inputX=Xtrain, labels=ytrain) X_mean = train_data.X_mean X_std = train_data.X_std valid_data = UKdale( name='valid', prep='normalize', cond=True, # False #path=data_path, X_mean=X_mean, X_std=X_std, inputX=Xval, labels=yval) init_W = InitCell('rand') init_U = InitCell('ortho') init_b = InitCell('zeros') init_b_sig = InitCell('const', mean=0.6) x, mask, y, y_mask = train_data.theano_vars() x.name = 'x_original' if debug: x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32) temp = np.ones((15, batch_size), dtype=np.float32) temp[:, -2:] = 0. mask.tag.test_value = temp x_1 = FullyConnectedLayer(name='x_1', parent=['x_t'], parent_dim=[x_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) z_1 = FullyConnectedLayer(name='z_1', parent=['z_t'], parent_dim=[z_dim], nout=z2s_dim, unit='relu', init_W=init_W, init_b=init_b) rnn = LSTM(name='rnn', parent=['x_1', 'z_1'], parent_dim=[x2s_dim, z2s_dim], nout=rnn_dim, unit='tanh', init_W=init_W, init_U=init_U, init_b=init_b) ''' dissag_pred = FullyConnectedLayer(name='disag_1', parent=['s_tm1'], parent_dim=[rnn_dim], nout=num_apps, unit='relu', init_W=init_W, init_b=init_b) ''' phi_1 = FullyConnectedLayer(name='phi_1', parent=['x_1', 's_tm1'], parent_dim=[x2s_dim, rnn_dim], nout=q_z_dim, unit='relu', init_W=init_W, init_b=init_b) phi_mu = FullyConnectedLayer(name='phi_mu', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) phi_sig = FullyConnectedLayer(name='phi_sig', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) prior_1 = FullyConnectedLayer(name='prior_1', parent=['s_tm1'], parent_dim=[rnn_dim], nout=p_z_dim, unit='relu', init_W=init_W, init_b=init_b) prior_mu = FullyConnectedLayer(name='prior_mu', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) prior_sig = FullyConnectedLayer(name='prior_sig', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_1 = FullyConnectedLayer(name='theta_1', parent=['z_1', 's_tm1'], parent_dim=[z2s_dim, rnn_dim], nout=p_x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_mu = FullyConnectedLayer(name='theta_mu', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_sig = FullyConnectedLayer(name='theta_sig', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) coeff = FullyConnectedLayer(name='coeff', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) corr = FullyConnectedLayer(name='corr', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='tanh', init_W=init_W, init_b=init_b) binary = FullyConnectedLayer(name='binary', parent=['theta_1'], parent_dim=[p_x_dim], nout=1, unit='sigmoid', init_W=init_W, init_b=init_b) nodes = [ rnn, x_1, z_1, #dissag_pred, phi_1, phi_mu, phi_sig, prior_1, prior_mu, prior_sig, theta_1, theta_mu, theta_sig, coeff ] #, corr, binary params = OrderedDict() for node in nodes: if node.initialize() is not None: params.update(node.initialize()) params = init_tparams(params) s_0 = rnn.get_init_state(batch_size) x_1_temp = x_1.fprop([x], params) def inner_fn(x_t, s_tm1): phi_1_t = phi_1.fprop([x_t, s_tm1], params) phi_mu_t = phi_mu.fprop([phi_1_t], params) phi_sig_t = phi_sig.fprop([phi_1_t], params) prior_1_t = prior_1.fprop([s_tm1], params) prior_mu_t = prior_mu.fprop([prior_1_t], params) prior_sig_t = prior_sig.fprop([prior_1_t], params) z_t = Gaussian_sample(phi_mu_t, phi_sig_t) z_1_t = z_1.fprop([z_t], params) theta_1_t = theta_1.fprop([z_1_t, s_tm1], params) theta_mu_t = theta_mu.fprop([theta_1_t], params) theta_sig_t = theta_sig.fprop([theta_1_t], params) coeff_t = coeff.fprop([theta_1_t], params) #corr_t = corr.fprop([theta_1_t], params) #binary_t = binary.fprop([theta_1_t], params) pred = GMM_sample(theta_mu_t, theta_sig_t, coeff_t) #Gaussian_sample(theta_mu_t, theta_sig_t) s_t = rnn.fprop([[x_t, z_1_t], [s_tm1]], params) #y_pred = dissag_pred.fprop([s_t], params) return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_t, z_1_t, theta_1_t, theta_mu_t, theta_sig_t, coeff_t, pred #, y_pred #corr_temp, binary_temp ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,z_t_temp, z_1_temp, theta_1_temp, theta_mu_temp, theta_sig_temp, coeff_temp, prediction), updates) =\ theano.scan(fn=inner_fn, sequences=[x_1_temp], outputs_info=[s_0, None, None, None, None, None, None, None, None, None, None, None]) for k, v in updates.iteritems(): k.default_update = v s_temp = concatenate( [s_0[None, :, :], s_temp[:-1]], axis=0 ) # seems like this is for creating an additional dimension to s_0 ''' theta_1_temp = theta_1.fprop([z_1_temp, s_temp], params) theta_mu_temp = theta_mu.fprop([theta_1_temp], params) theta_sig_temp = theta_sig.fprop([theta_1_temp], params) coeff_temp = coeff.fprop([theta_1_temp], params) corr_temp = corr.fprop([theta_1_temp], params) binary_temp = binary.fprop([theta_1_temp], params) ''' s_temp.name = 'h_1' #gisse z_1_temp.name = 'z_1' #gisse z_t_temp.name = 'z' theta_mu_temp.name = 'theta_mu_temp' theta_sig_temp.name = 'theta_sig_temp' coeff_temp.name = 'coeff' #corr_temp.name = 'corr' #binary_temp.name = 'binary' if (flgAgg == -1): prediction.name = 'x_reconstructed' mse = T.mean((prediction - x)**2) # CHECK RESHAPE with an assertion mae = T.mean(T.abs(prediction - x)) mse.name = 'mse' pred_in = x.reshape((x_shape[0] * x_shape[1], -1)) else: prediction.name = 'pred_' + str(flgAgg) #[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1) mse = T.mean( (prediction - y)**2) # As axis = None is calculated for all mae = T.mean(T.abs_(prediction - y)) mse.name = 'mse' mae.name = 'mae' pred_in = y.reshape((y.shape[0] * y.shape[1], -1)) kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp) x_shape = x.shape theta_mu_in = theta_mu_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig_in = theta_sig_temp.reshape((x_shape[0] * x_shape[1], -1)) coeff_in = coeff_temp.reshape((x_shape[0] * x_shape[1], -1)) #corr_in = corr_temp.reshape((x_shape[0]*x_shape[1], -1)) #binary_in = binary_temp.reshape((x_shape[0]*x_shape[1], -1)) recon = GMM( pred_in, theta_mu_in, theta_sig_in, coeff_in ) # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in) recon = recon.reshape((x_shape[0], x_shape[1])) recon.name = 'gmm_out' #recon = recon * mask recon_term = recon.sum(axis=0).mean() recon_term.name = 'recon_term' #kl_temp = kl_temp * mask kl_term = kl_temp.sum(axis=0).mean() kl_term.name = 'kl_term' nll_upper_bound = recon_term + kl_term #+ mse if (flgMSE): nll_upper_bound = nll_upper_bound + mse nll_upper_bound.name = 'nll_upper_bound' max_x = x.max() mean_x = x.mean() min_x = x.min() max_x.name = 'max_x' mean_x.name = 'mean_x' min_x.name = 'min_x' max_theta_mu = theta_mu_in.max() mean_theta_mu = theta_mu_in.mean() min_theta_mu = theta_mu_in.min() max_theta_mu.name = 'max_theta_mu' mean_theta_mu.name = 'mean_theta_mu' min_theta_mu.name = 'min_theta_mu' max_theta_sig = theta_sig_in.max() mean_theta_sig = theta_sig_in.mean() min_theta_sig = theta_sig_in.min() max_theta_sig.name = 'max_theta_sig' mean_theta_sig.name = 'mean_theta_sig' min_theta_sig.name = 'min_theta_sig' coeff_max = coeff_in.max() coeff_min = coeff_in.min() coeff_mean_max = coeff_in.mean(axis=0).max() coeff_mean_min = coeff_in.mean(axis=0).min() coeff_max.name = 'coeff_max' coeff_min.name = 'coeff_min' coeff_mean_max.name = 'coeff_mean_max' coeff_mean_min.name = 'coeff_mean_min' max_phi_sig = phi_sig_temp.max() mean_phi_sig = phi_sig_temp.mean() min_phi_sig = phi_sig_temp.min() max_phi_sig.name = 'max_phi_sig' mean_phi_sig.name = 'mean_phi_sig' min_phi_sig.name = 'min_phi_sig' max_prior_sig = prior_sig_temp.max() mean_prior_sig = prior_sig_temp.mean() min_prior_sig = prior_sig_temp.min() max_prior_sig.name = 'max_prior_sig' mean_prior_sig.name = 'mean_prior_sig' min_prior_sig.name = 'min_prior_sig' model.inputs = [x, mask, y, y_mask] model.params = params model.nodes = nodes optimizer = Adam(lr=lr) extension = [ GradientClipping(batch_size=batch_size), EpochCount(epoch), Monitoring( freq=monitoring_freq, ddout=[ nll_upper_bound, recon_term, kl_term, mse, mae, theta_mu_temp, theta_sig_temp, z_t_temp, prediction, #corr_temp, binary_temp, s_temp, z_1_temp ], indexSep=5, indexDDoutPlot=[(0, theta_mu_temp), (2, z_t_temp), (3, prediction)], instancesPlot=instancesPlot, #{0:[4,20],2:[5,10]},#, 80,150 data=[Iterator(valid_data, batch_size)], savedFolder=save_path), Picklize(freq=monitoring_freq, path=save_path), EarlyStopping(freq=monitoring_freq, path=save_path, channel=channel_name), WeightNorm() ] lr_iterations = {0: lr, 15: (lr / 10), 70: (lr / 100)} mainloop = Training(name=pkl_name, data=Iterator(train_data, batch_size), model=model, optimizer=optimizer, cost=nll_upper_bound, outputs=[nll_upper_bound], extension=extension, lr_iterations=lr_iterations) #mainloop.run() fLog = open(save_path + '/output.csv', 'w') fLog.write("log,kl,nll_upper_bound,mse,mae\n") for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']): a = mainloop.trainlog.monitor['recon_term'][i] b = mainloop.trainlog.monitor['kl_term'][i] c = mainloop.trainlog.monitor['nll_upper_bound'][i] d = mainloop.trainlog.monitor['mse'][i] e = mainloop.trainlog.monitor['mae'][i] fLog.write("{},{},{},{},{}\n".format(a, b, c, d, e))
def main(args): #theano.optimizer='fast_compile' #theano.config.exception_verbosity='high' trial = int(args['trial']) pkl_name = 'vrnn_gmm_%d' % trial channel_name = 'nll_upper_bound' data_path = args['data_path'] save_path = args[ 'save_path'] #+'/gmm/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M") flgMSE = int(args['flgMSE']) period = int(args['period']) n_steps = int(args['n_steps']) stride_train = int(args['stride_train']) stride_test = n_steps # int(args['stride_test']) monitoring_freq = int(args['monitoring_freq']) epoch = int(args['epoch']) batch_size = int(args['batch_size']) x_dim = int(args['x_dim']) y_dim = int(args['y_dim']) flgAgg = int(args['flgAgg']) z_dim = int(args['z_dim']) rnn_dim = int(args['rnn_dim']) k = int(args['num_k']) #a mixture of K Gaussian functions lr = float(args['lr']) typeLoad = int(args['typeLoad']) debug = int(args['debug']) n_steps_val = n_steps print "trial no. %d" % trial print "batch size %d" % batch_size print "learning rate %f" % lr print "saving pkl file '%s'" % pkl_name print "to the save path '%s'" % save_path print str(windows) q_z_dim = 180 p_z_dim = 180 p_x_dim = 200 x2s_dim = 100 z2s_dim = 150 target_dim = k #x_dim #(x_dim-1)*k model = Model() Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_dataport( data_path, windows, appliances, numApps=flgAgg, period=period, n_steps=n_steps, stride_train=stride_train, stride_test=stride_test, trainPer=0.6, valPer=0.2, testPer=0.2, typeLoad=typeLoad, flgAggSumScaled=1, flgFilterZeros=1) instancesPlot = { 0: [5] } #for now use hard coded instancesPlot for kelly sampling train_data = Dataport( name='train', prep='normalize', cond=True, # False #path=data_path, inputX=ytrain, labels=Xtrain) X_mean = train_data.X_mean X_std = train_data.X_std valid_data = Dataport( name='valid', prep='normalize', cond=True, # False #path=data_path, X_mean=X_mean, X_std=X_std, inputX=yval, labels=Xval) init_W = InitCell('rand') init_U = InitCell('ortho') init_b = InitCell('zeros') init_b_sig = InitCell('const', mean=0.6) x, mask, y, y_mask = train_data.theano_vars() scheduleSamplingMask = T.fvector('schedMask') x.name = 'x_original' if debug: x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32) temp = np.ones((15, batch_size), dtype=np.float32) temp[:, -2:] = 0. mask.tag.test_value = temp x_1 = FullyConnectedLayer(name='x_1', parent=['x_t'], parent_dim=[x_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) z_1 = FullyConnectedLayer(name='z_1', parent=['z_t'], parent_dim=[z_dim], nout=z2s_dim, unit='relu', init_W=init_W, init_b=init_b) rnn = LSTM(name='rnn', parent=['x_1', 'z_1'], parent_dim=[x2s_dim, z2s_dim], nout=rnn_dim, unit='tanh', init_W=init_W, init_U=init_U, init_b=init_b) ''' dissag_pred = FullyConnectedLayer(name='disag_1', parent=['s_tm1'], parent_dim=[rnn_dim], nout=num_apps, unit='relu', init_W=init_W, init_b=init_b) ''' phi_1 = FullyConnectedLayer(name='phi_1', parent=['x_1', 's_tm1'], parent_dim=[x2s_dim, rnn_dim], nout=q_z_dim, unit='relu', init_W=init_W, init_b=init_b) phi_mu = FullyConnectedLayer(name='phi_mu', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) phi_sig = FullyConnectedLayer(name='phi_sig', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) prior_1 = FullyConnectedLayer(name='prior_1', parent=['s_tm1'], parent_dim=[rnn_dim], nout=p_z_dim, unit='relu', init_W=init_W, init_b=init_b) prior_mu = FullyConnectedLayer(name='prior_mu', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) prior_sig = FullyConnectedLayer(name='prior_sig', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_1 = FullyConnectedLayer(name='theta_1', parent=['z_1', 's_tm1'], parent_dim=[z2s_dim, rnn_dim], nout=p_x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_mu = FullyConnectedLayer(name='theta_mu', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_sig = FullyConnectedLayer(name='theta_sig', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) coeff = FullyConnectedLayer(name='coeff', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) corr = FullyConnectedLayer(name='corr', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='tanh', init_W=init_W, init_b=init_b) binary = FullyConnectedLayer(name='binary', parent=['theta_1'], parent_dim=[p_x_dim], nout=1, unit='sigmoid', init_W=init_W, init_b=init_b) nodes = [ rnn, x_1, z_1, #dissag_pred, phi_1, phi_mu, phi_sig, prior_1, prior_mu, prior_sig, theta_1, theta_mu, theta_sig, coeff ] #, corr, binary params = OrderedDict() for node in nodes: if node.initialize() is not None: params.update(node.initialize()) params = init_tparams(params) s_0 = rnn.get_init_state(batch_size) x_1_temp = x_1.fprop([x], params) def inner_val_fn(s_tm1): ''' phi_1_t = phi_1.fprop([x_t, s_tm1], params) phi_mu_t = phi_mu.fprop([phi_1_t], params) phi_sig_t = phi_sig.fprop([phi_1_t], params) ''' prior_1_t = prior_1.fprop([s_tm1], params) prior_mu_t = prior_mu.fprop([prior_1_t], params) prior_sig_t = prior_sig.fprop([prior_1_t], params) z_t = Gaussian_sample(prior_mu_t, prior_sig_t) z_1_t = z_1.fprop([z_t], params) theta_1_t = theta_1.fprop([z_1_t, s_tm1], params) theta_mu_t = theta_mu.fprop([theta_1_t], params) theta_sig_t = theta_sig.fprop([theta_1_t], params) coeff_t = coeff.fprop([theta_1_t], params) pred_t = GMM_sample(theta_mu_t, theta_sig_t, coeff_t) #Gaussian_sample(theta_mu_t, theta_sig_t) pred_1_t = x_1.fprop([pred_t], params) s_t = rnn.fprop([[pred_1_t, z_1_t], [s_tm1]], params) return s_t, pred_t, z_t, theta_1_t, theta_mu_t, theta_sig_t, coeff_t # prior_mu_temp_val, prior_sig_temp_val ((s_temp_val, prediction_val, z_t_temp_val, theta_1_temp_val, theta_mu_temp_val, theta_sig_temp_val, coeff_temp_val), updates_val) =\ theano.scan(fn=inner_val_fn , n_steps=n_steps_val, #already 1 subtracted if doing next step outputs_info=[s_0, None, None, None, None, None, None]) for k, v in updates_val.iteritems(): k.default_update = v def inner_fn(x_t, s_tm1): phi_1_t = phi_1.fprop([x_t, s_tm1], params) phi_mu_t = phi_mu.fprop([phi_1_t], params) phi_sig_t = phi_sig.fprop([phi_1_t], params) prior_1_t = prior_1.fprop([s_tm1], params) prior_mu_t = prior_mu.fprop([prior_1_t], params) prior_sig_t = prior_sig.fprop([prior_1_t], params) z_t = Gaussian_sample(phi_mu_t, phi_sig_t) z_1_t = z_1.fprop([z_t], params) theta_1_t = theta_1.fprop([z_1_t, s_tm1], params) theta_mu_t = theta_mu.fprop([theta_1_t], params) theta_sig_t = theta_sig.fprop([theta_1_t], params) coeff_t = coeff.fprop([theta_1_t], params) #corr_t = corr.fprop([theta_1_t], params) #binary_t = binary.fprop([theta_1_t], params) pred = GMM_sample(theta_mu_t, theta_sig_t, coeff_t) #Gaussian_sample(theta_mu_t, theta_sig_t) s_t = rnn.fprop([[x_t, z_1_t], [s_tm1]], params) #y_pred = dissag_pred.fprop([s_t], params) return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_t, z_1_t, theta_1_t, theta_mu_t, theta_sig_t, coeff_t, pred #, y_pred #corr_temp, binary_temp ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,z_t_temp, z_1_temp, theta_1_temp, theta_mu_temp, theta_sig_temp, coeff_temp, prediction), updates) =\ theano.scan(fn=inner_fn, sequences=[x_1_temp ], outputs_info=[s_0, None, None, None, None, None, None, None, None, None, None, None]) for k, v in updates.iteritems(): k.default_update = v s_temp = concatenate( [s_0[None, :, :], s_temp[:-1]], axis=0 ) # seems like this is for creating an additional dimension to s_0 ''' theta_1_temp = theta_1.fprop([z_1_temp, s_temp], params) theta_mu_temp = theta_mu.fprop([theta_1_temp], params) theta_sig_temp = theta_sig.fprop([theta_1_temp], params) coeff_temp = coeff.fprop([theta_1_temp], params) corr_temp = corr.fprop([theta_1_temp], params) binary_temp = binary.fprop([theta_1_temp], params) ''' s_temp.name = 'h' #gisse z_1_temp.name = 'z2' #gisse z_t_temp.name = 'z' theta_mu_temp.name = 'mu' theta_sig_temp.name = 'sig' coeff_temp.name = 'coeff' prediction.name = 'Prediction-' + str(appliances[flgAgg][:-1]) mse = T.mean((prediction - x)**2) # As axis = None is calculated for all mae = T.mean(T.abs_(prediction - x)) mse.name = 'mse' mae.name = 'mae' x_in = x.reshape((batch_size * n_steps, -1)) kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp) x_shape = x.shape theta_mu_in = theta_mu_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig_in = theta_sig_temp.reshape((x_shape[0] * x_shape[1], -1)) coeff_in = coeff_temp.reshape((x_shape[0] * x_shape[1], -1)) #corr_in = corr_temp.reshape((x_shape[0]*x_shape[1], -1)) #binary_in = binary_temp.reshape((x_shape[0]*x_shape[1], -1)) recon = GMM( x_in, theta_mu_in, theta_sig_in, coeff_in ) # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in) recon = recon.reshape((x_shape[0], x_shape[1])) recon.name = 'gmm_out' #recon = recon * mask recon_term = recon.sum(axis=0).mean() recon_term.name = 'recon_term' #kl_temp = kl_temp * mask kl_term = kl_temp.sum(axis=0).mean() kl_term.name = 'kl_term' nll_upper_bound = recon_term + kl_term #+ mse if (flgMSE): nll_upper_bound = nll_upper_bound + mse nll_upper_bound.name = 'nll_upper_bound' ############## TEST ############### theta_mu_in_val = theta_mu_temp_val.reshape((batch_size * n_steps, -1)) theta_sig_in_val = theta_sig_temp_val.reshape((batch_size * n_steps, -1)) coeff_in_val = coeff_temp_val.reshape((batch_size * n_steps, -1)) pred_in = prediction_val.reshape((batch_size * n_steps, -1)) recon_val = GMM( pred_in, theta_mu_in_val, theta_sig_in_val, coeff_in_val ) # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in) recon_val = recon_val.reshape((batch_size, n_steps)) recon_val.name = 'gmm_out_val' model.inputs = [x, mask, y, y_mask, scheduleSamplingMask] model.params = params model.nodes = nodes optimizer = Adam(lr=lr) header = "epoch,log,kl,nll_upper_bound,mse,mae\n" extension = [ GradientClipping(batch_size=batch_size), EpochCount(epoch, save_path, header), Monitoring( freq=monitoring_freq, ddout=[nll_upper_bound, recon_term, kl_term, mse, mae, prediction], indexSep=5, indexDDoutPlot=[(0, theta_mu_temp), (2, z_t_temp), (3, prediction)], instancesPlot=instancesPlot, #{0:[4,20],2:[5,10]},#, 80,150 data=[Iterator(valid_data, batch_size)], savedFolder=save_path), Picklize(freq=monitoring_freq, path=save_path), EarlyStopping(freq=monitoring_freq, path=save_path, channel=channel_name), WeightNorm() ] lr_iterations = { 0: lr, 30: (lr / 10) } #, 150:(lr/10), 270:(lr/100), 370:(lr/1000) mainloop = Training(name=pkl_name, data=Iterator(train_data, batch_size), model=model, optimizer=optimizer, cost=nll_upper_bound, outputs=[nll_upper_bound], n_steps=n_steps, extension=extension, lr_iterations=lr_iterations) mainloop.run() test_fn = theano.function( inputs=[], outputs=[prediction_val, recon_val], updates= updates_val #, allow_input_downcast=True, on_unused_input='ignore' ) outputGeneration = test_fn() #{0:[4,20], 2:[5,10]} ''' plt.figure(1) plt.plot(np.transpose(outputGeneration[0],[1,0,2])[5]) plt.savefig(save_path+"/vrnn_dis_generated_z_0-4.ps") plt.figure(2) plt.plot(np.transpose(outputGeneration[1],[1,0,2])[5]) plt.savefig(save_path+"/vrnn_dis_generated_s_0-4.ps") plt.figure(3) plt.plot(np.transpose(outputGeneration[2],[1,0,2])[5]) plt.savefig(save_path+"/vrnn_dis_generated_theta_0-4.ps") ''' plt.figure(1) plt.plot(np.transpose(outputGeneration[0], [1, 0, 2])[2]) plt.savefig(save_path + "/vrnn_dis_generated_pred_0-2.ps") plt.figure(2) plt.plot(np.transpose(outputGeneration[0], [1, 0, 2])[10]) plt.savefig(save_path + "/vrnn_dis_generated_pred_0-10.ps") plt.figure(3) plt.plot(np.transpose(outputGeneration[0], [1, 0, 2])[15]) plt.savefig(save_path + "/vrnn_dis_generated_pred_0-15.ps") testLogLike = np.asarray(outputGeneration[1]).mean() fLog = open(save_path + '/output.csv', 'w') fLog.write(str(lr_iterations) + "\n") fLog.write(str(windows) + "\n") fLog.write("Test-log-likelihood\n") fLog.write("{}\n".format(testLogLike)) fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,z2s_dim\n") fLog.write("{},{},{},{},{}\n".format(q_z_dim, p_z_dim, p_x_dim, x2s_dim, z2s_dim)) fLog.write("epoch,log,kl,nll_upper_bound,mse,mae\n") for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']): f = mainloop.trainlog.monitor['epoch'][i] a = mainloop.trainlog.monitor['recon_term'][i] b = mainloop.trainlog.monitor['kl_term'][i] c = mainloop.trainlog.monitor['nll_upper_bound'][i] d = mainloop.trainlog.monitor['mse'][i] e = mainloop.trainlog.monitor['mae'][i] fLog.write("{:d},{:.2f},{:.2f},{:.2f},{:.3f},{:.3f}\n".format( f, a, b, c, d, e)) fLog.close() f = open(save_path + '/outputRealGeneration.pkl', 'wb') cPickle.dump(outputGeneration, f, -1) f.close()
cost.name = 'cross_entropy' err.name = 'error_rate' model.inputs = [x, y] model._params = params model.nodes = nodes # Define your optimizer: Momentum (Nesterov), RMSProp, Adam optimizer = RMSProp(lr=0.001) extension = [ GradientClipping(), EpochCount(40), Monitoring(freq=100, ddout=[cost, err], data=[ Iterator(train_data, batch_size), Iterator(valid_data, batch_size) ]), Picklize(freq=200, path=save_path) ] mainloop = Training(name='toy_mnist', data=Iterator(train_data, batch_size), model=model, optimizer=optimizer, cost=cost, outputs=[cost, err], extension=extension) mainloop.run()
def main(args): theano.optimizer = 'fast_compile' #theano.config.exception_verbosity='high' trial = int(args['trial']) pkl_name = 'vrnn_gmm_%d' % trial channel_name = 'valid_nll_upper_bound' data_path = args['data_path'] save_path = args[ 'save_path'] #+'/aggVSdisag_distrib/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M") period = int(args['period']) n_steps = int(args['n_steps']) stride_train = int(args['stride_train']) stride_test = n_steps flgMSE = int(args['flgMSE']) monitoring_freq = int(args['monitoring_freq']) epoch = int(args['epoch']) batch_size = int(args['batch_size']) x_dim = int(args['x_dim']) y_dim = int(args['y_dim']) z_dim = int(args['z_dim']) rnn_dim = int(args['rnn_dim']) k = int(args['num_k']) #a mixture of K Gaussian functions lr = float(args['lr']) origLR = lr debug = int(args['debug']) print "trial no. %d" % trial print "batch size %d" % batch_size print "learning rate %f" % lr print "saving pkl file '%s'" % pkl_name print "to the save path '%s'" % save_path q_z_dim = 20 #150 p_z_dim = 20 #150 p_x_dim = 20 #250 x2s_dim = 20 #250 z2s_dim = 20 #150 target_dim = k # As different appliances are separeted in theta_mu1, theta_mu2, etc... each one is just created from k different Gaussians model = Model() Xtrain, ytrain, Xval, yval, reader = fetch_ukdale( data_path, windows, appliances, numApps=-1, period=period, n_steps=n_steps, stride_train=stride_train, stride_test=stride_test, flgAggSumScaled=1, flgFilterZeros=1) instancesPlot = reader.build_dict_instances_plot(listDates, batch_size, Xval.shape[0]) train_data = UKdale( name='train', prep='normalize', cond=True, # False #path=data_path, inputX=Xtrain, labels=ytrain) X_mean = train_data.X_mean X_std = train_data.X_std valid_data = UKdale( name='valid', prep='normalize', cond=True, # False #path=data_path, X_mean=X_mean, X_std=X_std, inputX=Xval, labels=yval) init_W = InitCell('rand') init_U = InitCell('ortho') init_b = InitCell('zeros') init_b_sig = InitCell('const', mean=0.6) x, mask, y, y_mask = train_data.theano_vars() x.name = 'x_original' x_input = x #[:,:-1,:] y_input = y #[:,1:,:] if debug: x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32) temp = np.ones((15, batch_size), dtype=np.float32) temp[:, -2:] = 0. mask.tag.test_value = temp x_1 = FullyConnectedLayer(name='x_1', parent=['x_t'], parent_dim=[x_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) z_1 = FullyConnectedLayer(name='z_1', parent=['z_t'], parent_dim=[z_dim], nout=z2s_dim, unit='relu', init_W=init_W, init_b=init_b) rnn = LSTM(name='rnn', parent=['x_1', 'z_1'], parent_dim=[x2s_dim, z2s_dim], nout=rnn_dim, unit='tanh', init_W=init_W, init_U=init_U, init_b=init_b) phi_1 = FullyConnectedLayer(name='phi_1', parent=['x_1', 's_tm1'], parent_dim=[x2s_dim, rnn_dim], nout=q_z_dim, unit='relu', init_W=init_W, init_b=init_b) phi_mu = FullyConnectedLayer(name='phi_mu', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) phi_sig = FullyConnectedLayer(name='phi_sig', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) prior_1 = FullyConnectedLayer(name='prior_1', parent=['s_tm1'], parent_dim=[rnn_dim], nout=p_z_dim, unit='relu', init_W=init_W, init_b=init_b) prior_mu = FullyConnectedLayer(name='prior_mu', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) prior_sig = FullyConnectedLayer(name='prior_sig', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_1 = FullyConnectedLayer(name='theta_1', parent=['z_1', 's_tm1'], parent_dim=[z2s_dim, rnn_dim], nout=p_x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_mu1 = FullyConnectedLayer(name='theta_mu1', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_mu2 = FullyConnectedLayer(name='theta_mu2', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_mu3 = FullyConnectedLayer(name='theta_mu3', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_mu4 = FullyConnectedLayer(name='theta_mu4', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_mu5 = FullyConnectedLayer(name='theta_mu5', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_sig1 = FullyConnectedLayer(name='theta_sig1', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_sig2 = FullyConnectedLayer(name='theta_sig2', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_sig3 = FullyConnectedLayer(name='theta_sig3', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_sig4 = FullyConnectedLayer(name='theta_sig4', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_sig5 = FullyConnectedLayer(name='theta_sig5', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) coeff1 = FullyConnectedLayer(name='coeff1', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) coeff2 = FullyConnectedLayer(name='coeff2', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) coeff3 = FullyConnectedLayer(name='coeff3', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) coeff4 = FullyConnectedLayer(name='coeff4', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) coeff5 = FullyConnectedLayer(name='coeff5', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) corr = FullyConnectedLayer(name='corr', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='tanh', init_W=init_W, init_b=init_b) binary = FullyConnectedLayer(name='binary', parent=['theta_1'], parent_dim=[p_x_dim], nout=1, unit='sigmoid', init_W=init_W, init_b=init_b) nodes = [ rnn, x_1, z_1, #dissag_pred, phi_1, phi_mu, phi_sig, prior_1, prior_mu, prior_sig, theta_1, theta_mu1, theta_mu2, theta_mu3, theta_mu4, theta_mu5, theta_sig1, theta_sig2, theta_sig3, theta_sig4, theta_sig5, coeff1, coeff2, coeff3, coeff4, coeff5 ] params = OrderedDict() for node in nodes: if node.initialize() is not None: params.update(node.initialize()) params = init_tparams(params) s_0 = rnn.get_init_state(batch_size) x_1_temp = x_1.fprop([x_input], params) def inner_fn(x_t, s_tm1): phi_1_t = phi_1.fprop([x_t, s_tm1], params) phi_mu_t = phi_mu.fprop([phi_1_t], params) phi_sig_t = phi_sig.fprop([phi_1_t], params) prior_1_t = prior_1.fprop([s_tm1], params) prior_mu_t = prior_mu.fprop([prior_1_t], params) prior_sig_t = prior_sig.fprop([prior_1_t], params) z_t = Gaussian_sample( phi_mu_t, phi_sig_t ) #in the original code it is gaussian. GMM is for the generation z_1_t = z_1.fprop([z_t], params) theta_1_t = theta_1.fprop([z_1_t, s_tm1], params) theta_mu1_t = theta_mu1.fprop([theta_1_t], params) theta_sig1_t = theta_sig1.fprop([theta_1_t], params) coeff1_t = coeff1.fprop([theta_1_t], params) theta_mu2_t = theta_mu2.fprop([theta_1_t], params) theta_sig2_t = theta_sig2.fprop([theta_1_t], params) coeff2_t = coeff2.fprop([theta_1_t], params) theta_mu3_t = theta_mu3.fprop([theta_1_t], params) theta_sig3_t = theta_sig3.fprop([theta_1_t], params) coeff3_t = coeff3.fprop([theta_1_t], params) theta_mu4_t = theta_mu4.fprop([theta_1_t], params) theta_sig4_t = theta_sig4.fprop([theta_1_t], params) coeff4_t = coeff4.fprop([theta_1_t], params) theta_mu5_t = theta_mu5.fprop([theta_1_t], params) theta_sig5_t = theta_sig5.fprop([theta_1_t], params) coeff5_t = coeff5.fprop([theta_1_t], params) y_pred1 = GMM_sampleY( theta_mu1_t, theta_sig1_t, coeff1_t) #Gaussian_sample(theta_mu_t, theta_sig_t) y_pred2 = GMM_sampleY(theta_mu2_t, theta_sig2_t, coeff2_t) y_pred3 = GMM_sampleY(theta_mu3_t, theta_sig3_t, coeff3_t) y_pred4 = GMM_sampleY(theta_mu4_t, theta_sig4_t, coeff4_t) y_pred5 = GMM_sampleY(theta_mu5_t, theta_sig5_t, coeff5_t) #y_pred = [GMM_sampleY(theta_mu_t[i], theta_sig_t[i], coeff_t[i]) for i in range(y_dim)]#T.stack([y_pred1,y_pred2],axis = 0 ) s_t = rnn.fprop([[x_t, z_1_t], [s_tm1]], params) #y_pred = dissag_pred.fprop([s_t], params) return (s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_t, z_1_t, theta_1_t, theta_mu1_t, theta_sig1_t, coeff1_t, theta_mu2_t, theta_sig2_t, coeff2_t, theta_mu3_t, theta_sig3_t, coeff3_t, theta_mu4_t, theta_sig4_t, coeff4_t, theta_mu5_t, theta_sig5_t, coeff5_t, y_pred1, y_pred2, y_pred3, y_pred4, y_pred5) #corr_temp, binary_temp ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,z_t_temp, z_1_temp, theta_1_temp, theta_mu1_temp, theta_sig1_temp, coeff1_temp, theta_mu2_temp, theta_sig2_temp, coeff2_temp, theta_mu3_temp, theta_sig3_temp, coeff3_temp, theta_mu4_temp, theta_sig4_temp, coeff4_temp, theta_mu5_temp, theta_sig5_temp, coeff5_temp, y_pred1_temp, y_pred2_temp, y_pred3_temp, y_pred4_temp, y_pred5_temp), updates) =\ theano.scan(fn=inner_fn, sequences=[x_1_temp], outputs_info=[s_0, None, None, None, None, None, None, None, None,None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]) for k, v in updates.iteritems(): k.default_update = v s_temp = concatenate( [s_0[None, :, :], s_temp[:-1]], axis=0 ) # seems like this is for creating an additional dimension to s_0 s_temp.name = 'h_1' #gisse z_1_temp.name = 'z_1' #gisse z_t_temp.name = 'z' theta_mu1_temp.name = 'theta_mu1' theta_sig1_temp.name = 'theta_sig1' coeff1_temp.name = 'coeff1' theta_mu2_temp.name = 'theta_mu2' theta_sig2_temp.name = 'theta_sig2' coeff2_temp.name = 'coeff2' theta_mu3_temp.name = 'theta_mu3' theta_sig3_temp.name = 'theta_sig3' coeff3_temp.name = 'coeff3' theta_mu4_temp.name = 'theta_mu4' theta_sig4_temp.name = 'theta_sig4' coeff4_temp.name = 'coeff4' theta_mu5_temp.name = 'theta_mu5' theta_sig5_temp.name = 'theta_sig5' coeff5_temp.name = 'coeff5' #corr_temp.name = 'corr' #binary_temp.name = 'binary' #x_pred_temp.name = 'x_reconstructed' y_pred1_temp.name = 'disaggregation1' y_pred2_temp.name = 'disaggregation2' y_pred3_temp.name = 'disaggregation3' y_pred4_temp.name = 'disaggregation4' y_pred5_temp.name = 'disaggregation5' ''' y_pred_temp = T.stack([y_pred1_temp, y_pred2_temp, y_pred3_temp, y_pred4_temp], axis=2) y_pred_temp = y_pred_temp.flatten(3)# because of the stack, i guess, there's a 4th dimension created mse = T.mean((y_pred_temp - y.reshape((y.shape[0], y.shape[1],-1)))**2) # cause mse can be 26000 ''' #[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1) mse1 = T.mean((y_pred1_temp - y_input[:, :, 0].reshape( (y_input.shape[0], y_input.shape[1], 1)))**2) # As axis = None is calculated for all mae1 = T.mean( T.abs_(y_pred1_temp - y_input[:, :, 0].reshape((y_input.shape[0], y_input.shape[1], 1)))) mse1.name = 'mse1' mae1.name = 'mae1' mse2 = T.mean((y_pred2_temp - y_input[:, :, 1].reshape( (y_input.shape[0], y_input.shape[1], 1)))**2) # As axis = None is calculated for all mae2 = T.mean( T.abs_(y_pred2_temp - y_input[:, :, 1].reshape((y_input.shape[0], y_input.shape[1], 1)))) mse2.name = 'mse2' mae2.name = 'mae2' mse3 = T.mean((y_pred3_temp - y_input[:, :, 2].reshape( (y_input.shape[0], y_input.shape[1], 1)))**2) # As axis = None is calculated for all mae3 = T.mean( T.abs_(y_pred3_temp - y_input[:, :, 2].reshape((y_input.shape[0], y_input.shape[1], 1)))) mse3.name = 'mse3' mae3.name = 'mae3' mse4 = T.mean((y_pred4_temp - y_input[:, :, 3].reshape( (y_input.shape[0], y_input.shape[1], 1)))**2) # As axis = None is calculated for all mae4 = T.mean( T.abs_(y_pred4_temp - y_input[:, :, 3].reshape((y_input.shape[0], y_input.shape[1], 1)))) mse4.name = 'mse4' mae4.name = 'mae4' mse5 = T.mean((y_pred5_temp - y_input[:, :, 4].reshape( (y_input.shape[0], y_input.shape[1], 1)))**2) # As axis = None is calculated for all mae5 = T.mean( T.abs_(y_pred5_temp - y_input[:, :, 4].reshape((y_input.shape[0], y_input.shape[1], 1)))) mse5.name = 'mse5' mae5.name = 'mae5' kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp) x_shape = x_input.shape y_shape = y_input.shape x_in = x.reshape((x_shape[0] * x_shape[1], -1)) y_in = y.reshape((y_shape[0] * y_shape[1], -1)) theta_mu1_in = theta_mu1_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig1_in = theta_sig1_temp.reshape((x_shape[0] * x_shape[1], -1)) coeff1_in = coeff1_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_mu2_in = theta_mu2_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig2_in = theta_sig2_temp.reshape((x_shape[0] * x_shape[1], -1)) coeff2_in = coeff2_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_mu3_in = theta_mu3_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig3_in = theta_sig3_temp.reshape((x_shape[0] * x_shape[1], -1)) coeff3_in = coeff3_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_mu4_in = theta_mu4_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig4_in = theta_sig4_temp.reshape((x_shape[0] * x_shape[1], -1)) coeff4_in = coeff4_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_mu5_in = theta_mu5_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig5_in = theta_sig5_temp.reshape((x_shape[0] * x_shape[1], -1)) coeff5_in = coeff5_temp.reshape((x_shape[0] * x_shape[1], -1)) #corr_in = corr_temp.reshape((x_shape[0]*x_shape[1], -1)) #binary_in = binary_temp.reshape((x_shape[0]*x_shape[1], -1)) recon = GMMdisag5( y_in, theta_mu1_in, theta_sig1_in, coeff1_in, theta_mu2_in, theta_sig2_in, coeff2_in, theta_mu3_in, theta_sig3_in, coeff3_in, theta_mu4_in, theta_sig4_in, coeff4_in, theta_mu5_in, theta_sig5_in, coeff5_in ) # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in) recon = recon.reshape((x_shape[0], x_shape[1])) recon.name = 'gmm_out' ''' recon5 = GMM(y_in[:,4, None], theta_mu5_in, theta_sig5_in, coeff5_in) recon5 = recon.reshape((x_shape[0], x_shape[1])) ''' recon_term = recon.sum(axis=0).mean() recon_term = recon.sum(axis=0).mean() recon_term.name = 'recon_term' #kl_temp = kl_temp * mask kl_term = kl_temp.sum(axis=0).mean() kl_term.name = 'kl_term' #nll_upper_bound_0 = recon_term + kl_term #nll_upper_bound_0.name = 'nll_upper_bound_0' if (flgMSE == 1): nll_upper_bound = recon_term + kl_term + mse1 + mse2 + mse3 + mse4 + mse5 else: nll_upper_bound = recon_term + kl_term nll_upper_bound.name = 'nll_upper_bound' ''' max_x = x.max() mean_x = x.mean() min_x = x.min() max_x.name = 'max_x' mean_x.name = 'mean_x' min_x.name = 'min_x' max_theta_mu = theta_mu_in.max() mean_theta_mu = theta_mu_in.mean() min_theta_mu = theta_mu_in.min() max_theta_mu.name = 'max_theta_mu' mean_theta_mu.name = 'mean_theta_mu' min_theta_mu.name = 'min_theta_mu' max_theta_sig = theta_sig_in.max() mean_theta_sig = theta_sig_in.mean() min_theta_sig = theta_sig_in.min() max_theta_sig.name = 'max_theta_sig' mean_theta_sig.name = 'mean_theta_sig' min_theta_sig.name = 'min_theta_sig' coeff_max = coeff_in.max() coeff_min = coeff_in.min() coeff_mean_max = coeff_in.mean(axis=0).max() coeff_mean_min = coeff_in.mean(axis=0).min() coeff_max.name = 'coeff_max' coeff_min.name = 'coeff_min' coeff_mean_max.name = 'coeff_mean_max' coeff_mean_min.name = 'coeff_mean_min' max_phi_sig = phi_sig_temp.max() mean_phi_sig = phi_sig_temp.mean() min_phi_sig = phi_sig_temp.min() max_phi_sig.name = 'max_phi_sig' mean_phi_sig.name = 'mean_phi_sig' min_phi_sig.name = 'min_phi_sig' max_prior_sig = prior_sig_temp.max() mean_prior_sig = prior_sig_temp.mean() min_prior_sig = prior_sig_temp.min() max_prior_sig.name = 'max_prior_sig' mean_prior_sig.name = 'mean_prior_sig' min_prior_sig.name = 'min_prior_sig' ''' model.inputs = [x, mask, y, y_mask] model.params = params model.nodes = nodes optimizer = Adam(lr=lr) extension = [ GradientClipping(batch_size=batch_size), EpochCount(epoch), Monitoring( freq=monitoring_freq, ddout=[ nll_upper_bound, recon_term, kl_term, mse1, mse2, mse3, mse4, mse5, mae1, mae2, mae3, mae4, mae5, y_pred1_temp, y_pred2_temp, y_pred3_temp, y_pred4_temp, y_pred5_temp ], indexSep=13, indexDDoutPlot=[13], # adding indexes of ddout for the plotting #, (6,y_pred_temp) instancesPlot=instancesPlot, #0-150 data=[Iterator(valid_data, batch_size)], savedFolder=save_path), Picklize(freq=monitoring_freq, path=save_path), EarlyStopping(freq=monitoring_freq, path=save_path, channel=channel_name), WeightNorm() ] lr_iterations = {0: lr, 150: (lr / 10)} mainloop = Training(name=pkl_name, data=Iterator(train_data, batch_size), model=model, optimizer=optimizer, cost=nll_upper_bound, outputs=[nll_upper_bound], extension=extension, lr_iterations=lr_iterations) mainloop.run() fLog = open(save_path + '/output.csv', 'w') lr_iterations = {0: origLR, 100: (origLR / 10)} fLog.write(str(lr_iterations) + "\n") fLog.write( "log,kl,nll_upper_bound,mse1,mae1,mse2,mae2,mse3,mae3,mse4,mae4,mse5,mae5\n" ) for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']): a = mainloop.trainlog.monitor['recon_term'][i] b = mainloop.trainlog.monitor['kl_term'][i] c = mainloop.trainlog.monitor['nll_upper_bound'][i] d = mainloop.trainlog.monitor['mse1'][i] e = mainloop.trainlog.monitor['mae1'][i] f = mainloop.trainlog.monitor['mse2'][i] g = mainloop.trainlog.monitor['mae2'][i] h = mainloop.trainlog.monitor['mse3'][i] j = mainloop.trainlog.monitor['mae3'][i] k = mainloop.trainlog.monitor['mse4'][i] l = mainloop.trainlog.monitor['mae4'][i] m = mainloop.trainlog.monitor['mse5'][i] n = mainloop.trainlog.monitor['mae5'][i] fLog.write("{},{},{},{},{},{},{},{},{},{},{},{},{}\n".format( a, b, c, d, e, f, g, h, j, k, l, m, n))
def main(args): theano.optimizer = 'fast_compile' #theano.config.exception_verbosity='high' trial = int(args['trial']) pkl_name = 'dp_disall-sch_%d' % trial channel_name = 'mae' data_path = args['data_path'] save_path = args[ 'save_path'] #+'/aggVSdisag_distrib/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M") period = int(args['period']) n_steps = int(args['n_steps']) stride_train = int(args['stride_train']) stride_test = int(args['stride_test']) loadType = int(args['loadType']) flgMSE = int(args['flgMSE']) monitoring_freq = int(args['monitoring_freq']) epoch = int(args['epoch']) batch_size = int(args['batch_size']) x_dim = int(args['x_dim']) y_dim = int(args['y_dim']) z_dim = int(args['z_dim']) rnn_dim = int(args['rnn_dim']) k = int(args['num_k']) #a mixture of K Gaussian functions lr = float(args['lr']) origLR = lr debug = int(args['debug']) kSchedSamp = int(args['kSchedSamp']) typeActivFunc = args['typeActivFunc'] print "trial no. %d" % trial print "batch size %d" % batch_size print "learning rate %f" % lr print "saving pkl file '%s'" % pkl_name print "to the save path '%s'" % save_path print(str(windows)) q_z_dim = 500 p_z_dim = 500 p_x_dim = 500 x2s_dim = 200 y2s_dim = 200 z2s_dim = 200 lr_iterations = {0: lr} target_dim = k # As different appliances are separeted in theta_mu1, theta_mu2, etc... each one is just created from k different Gaussians model = Model() Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_redd( data_path, windows, appliances, numApps=-1, period=period, n_steps=n_steps, stride_train=stride_train, stride_test=stride_test, trainPer=0.5, valPer=0.25, testPer=0.25, typeLoad=loadType, flgAggSumScaled=1, flgFilterZeros=1) print(Xtrain.shape, Xval.shape, Xtest.shape, ytrain.shape, yval.shape, ytest.shape) print("Mean ", reader.meanTraining) print("Std", reader.stdTraining) instancesPlot = {0: [4]} train_data = Redd( name='train', prep='normalize', cond=True, # False #path=data_path, inputX=Xtrain, labels=ytrain) X_mean = train_data.X_mean X_std = train_data.X_std valid_data = Redd( name='valid', prep='normalize', cond=True, # False #path=data_path, X_mean=X_mean, X_std=X_std, inputX=Xval, labels=yval) test_data = Redd( name='valid', prep='normalize', cond=True, # False #path=data_path, X_mean=X_mean, X_std=X_std, inputX=Xtest, labels=ytest) init_W = InitCell('rand') init_U = InitCell('ortho') init_b = InitCell('zeros') init_b_sig = InitCell('const', mean=0.6) x, mask, y, y_mask = train_data.theano_vars() scheduleSamplingMask = T.fvector('schedMask') x.name = 'x_original' if debug: x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32) temp = np.ones((15, batch_size), dtype=np.float32) temp[:, -2:] = 0. mask.tag.test_value = temp x_1 = FullyConnectedLayer(name='x_1', parent=['x_t'], parent_dim=[x_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) y_1 = FullyConnectedLayer(name='y_1', parent=['y_t'], parent_dim=[y_dim], nout=y2s_dim, unit='relu', init_W=init_W, init_b=init_b) z_1 = FullyConnectedLayer(name='z_1', parent=['z_t'], parent_dim=[z_dim], nout=z2s_dim, unit='relu', init_W=init_W, init_b=init_b) rnn = LSTM(name='rnn', parent=['x_1', 'z_1', 'y_1'], parent_dim=[x2s_dim, z2s_dim, y2s_dim], nout=rnn_dim, unit='tanh', init_W=init_W, init_U=init_U, init_b=init_b) phi_1 = FullyConnectedLayer(name='phi_1', parent=['x_1', 's_tm1', 'y_1'], parent_dim=[x2s_dim, rnn_dim, y2s_dim], nout=q_z_dim, unit='relu', init_W=init_W, init_b=init_b) phi_mu = FullyConnectedLayer(name='phi_mu', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) phi_sig = FullyConnectedLayer(name='phi_sig', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) prior_1 = FullyConnectedLayer(name='prior_1', parent=['x_1', 's_tm1'], parent_dim=[x2s_dim, rnn_dim], nout=p_z_dim, unit='relu', init_W=init_W, init_b=init_b) prior_mu = FullyConnectedLayer(name='prior_mu', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) prior_sig = FullyConnectedLayer(name='prior_sig', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_1 = FullyConnectedLayer(name='theta_1', parent=['z_1', 's_tm1'], parent_dim=[z2s_dim, rnn_dim], nout=p_x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_mu1 = FullyConnectedLayer(name='theta_mu1', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit=typeActivFunc, init_W=init_W, init_b=init_b) if (y_dim > 1): theta_mu2 = FullyConnectedLayer(name='theta_mu2', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit=typeActivFunc, init_W=init_W, init_b=init_b) if (y_dim > 2): theta_mu3 = FullyConnectedLayer(name='theta_mu3', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit=typeActivFunc, init_W=init_W, init_b=init_b) if (y_dim > 3): theta_mu4 = FullyConnectedLayer(name='theta_mu4', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit=typeActivFunc, init_W=init_W, init_b=init_b) theta_sig1 = FullyConnectedLayer(name='theta_sig1', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) if (y_dim > 1): theta_sig2 = FullyConnectedLayer(name='theta_sig2', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) if (y_dim > 2): theta_sig3 = FullyConnectedLayer(name='theta_sig3', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) if (y_dim > 3): theta_sig4 = FullyConnectedLayer(name='theta_sig4', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) coeff1 = FullyConnectedLayer(name='coeff1', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) if (y_dim > 1): coeff2 = FullyConnectedLayer(name='coeff2', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) if (y_dim > 2): coeff3 = FullyConnectedLayer(name='coeff3', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) if (y_dim > 3): coeff4 = FullyConnectedLayer(name='coeff4', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) corr = FullyConnectedLayer(name='corr', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='tanh', init_W=init_W, init_b=init_b) binary = FullyConnectedLayer(name='binary', parent=['theta_1'], parent_dim=[p_x_dim], nout=1, unit='sigmoid', init_W=init_W, init_b=init_b) nodes = [ rnn, x_1, y_1, z_1, #dissag_pred, phi_1, phi_mu, phi_sig, prior_1, prior_mu, prior_sig, theta_1, theta_mu1, theta_sig1, coeff1 ] dynamicOutput = [None, None, None, None, None, None, None, None] if (y_dim > 1): nodes = nodes + [theta_mu2, theta_sig2, coeff2] dynamicOutput = dynamicOutput + [None, None, None, None ] #mu, sig, coef and pred if (y_dim > 2): nodes = nodes + [theta_mu3, theta_sig3, coeff3] dynamicOutput = dynamicOutput + [None, None, None, None] if (y_dim > 3): nodes = nodes + [theta_mu4, theta_sig4, coeff4] dynamicOutput = dynamicOutput + [None, None, None, None] params = OrderedDict() for node in nodes: if node.initialize() is not None: params.update(node.initialize()) params = init_tparams(params) s_0 = rnn.get_init_state(batch_size) x_1_temp = x_1.fprop([x], params) y_1_temp = y_1.fprop([y], params) output_fn = [s_0] + dynamicOutput output_fn_val = [s_0] + dynamicOutput[2:] print(len(output_fn), len(output_fn_val)) def inner_fn(x_t, y_t, scheduleSamplingMask, s_tm1): phi_1_t = phi_1.fprop([x_t, s_tm1, y_t], params) phi_mu_t = phi_mu.fprop([phi_1_t], params) phi_sig_t = phi_sig.fprop([phi_1_t], params) prior_1_t = prior_1.fprop([x_t, s_tm1], params) prior_mu_t = prior_mu.fprop([prior_1_t], params) prior_sig_t = prior_sig.fprop([prior_1_t], params) z_t = Gaussian_sample( phi_mu_t, phi_sig_t ) #in the original code it is gaussian. GMM is for the generation z_1_t = z_1.fprop([z_t], params) theta_1_t = theta_1.fprop([z_1_t, s_tm1], params) theta_mu1_t = theta_mu1.fprop([theta_1_t], params) theta_sig1_t = theta_sig1.fprop([theta_1_t], params) coeff1_t = coeff1.fprop([theta_1_t], params) ## prediction 1 y_pred = GMM_sampleY( theta_mu1_t, theta_sig1_t, coeff1_t) #Gaussian_sample(theta_mu_t, theta_sig_t) tupleMulti = phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, theta_mu1_t, theta_sig1_t, coeff1_t, y_pred if (y_dim > 1): theta_mu2_t = theta_mu2.fprop([theta_1_t], params) theta_sig2_t = theta_sig2.fprop([theta_1_t], params) coeff2_t = coeff2.fprop([theta_1_t], params) y_pred2 = GMM_sampleY(theta_mu2_t, theta_sig2_t, coeff2_t) y_pred = T.concatenate([y_pred, y_pred2], axis=1) tupleMulti = tupleMulti + (theta_mu2_t, theta_sig2_t, coeff2_t, y_pred2) if (y_dim > 2): theta_mu3_t = theta_mu3.fprop([theta_1_t], params) theta_sig3_t = theta_sig3.fprop([theta_1_t], params) coeff3_t = coeff3.fprop([theta_1_t], params) y_pred3 = GMM_sampleY(theta_mu3_t, theta_sig3_t, coeff3_t) y_pred = T.concatenate([y_pred, y_pred3], axis=1) tupleMulti = tupleMulti + (theta_mu3_t, theta_sig3_t, coeff3_t, y_pred3) if (y_dim > 3): theta_mu4_t = theta_mu4.fprop([theta_1_t], params) theta_sig4_t = theta_sig4.fprop([theta_1_t], params) coeff4_t = coeff4.fprop([theta_1_t], params) y_pred4 = GMM_sampleY(theta_mu4_t, theta_sig4_t, coeff4_t) y_pred = T.concatenate([y_pred, y_pred4], axis=1) tupleMulti = tupleMulti + (theta_mu4_t, theta_sig4_t, coeff4_t, y_pred4) #s_t = rnn.fprop([[x_t, z_1_t, y_t], [s_tm1]], params) if (scheduleSamplingMask == 1): s_t = rnn.fprop([[x_t, z_1_t, y_t], [s_tm1]], params) else: y_t_aux = y_1.fprop([y_pred], params) s_t = rnn.fprop([[x_t, z_1_t, y_t_aux], [s_tm1]], params) return (s_t, ) + tupleMulti #corr_temp, binary_temp (otherResults, updates) = theano.scan( fn=inner_fn, sequences=[x_1_temp, y_1_temp, scheduleSamplingMask], outputs_info=output_fn) #[s_0, (None)] s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,\ theta_mu1_temp, theta_sig1_temp, coeff1_temp, y_pred1_temp = otherResults[:9] restResults = otherResults[9:] for k, v in updates.iteritems(): k.default_update = v #s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0)# seems like this is for creating an additional dimension to s_0 theta_mu1_temp.name = 'theta_mu1' theta_sig1_temp.name = 'theta_sig1' coeff1_temp.name = 'coeff1' y_pred1_temp.name = 'disaggregation1' #[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1) mse1 = T.mean((y_pred1_temp - y[:, :, 0].reshape( (y.shape[0], y.shape[1], 1)))**2) # As axis = None is calculated for all mae1 = T.mean( T.abs_(y_pred1_temp - y[:, :, 0].reshape((y.shape[0], y.shape[1], 1)))) mse1.name = 'mse1' mae1.name = 'mae1' kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp) x_shape = x.shape y_shape = y.shape x_in = x.reshape((x_shape[0] * x_shape[1], -1)) y_in = y.reshape((y_shape[0] * y_shape[1], -1)) theta_mu1_in = theta_mu1_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig1_in = theta_sig1_temp.reshape((x_shape[0] * x_shape[1], -1)) coeff1_in = coeff1_temp.reshape((x_shape[0] * x_shape[1], -1)) ddoutMSEA = [] ddoutYpreds = [y_pred1_temp] indexSepDynamic = 7 #plus two totalmse, totalmae totaMAE = T.copy(mae1) totaMSE = T.copy(mse1) mse2 = T.zeros((1, )) mae2 = T.zeros((1, )) mse3 = T.zeros((1, )) mae3 = T.zeros((1, )) mse4 = T.zeros((1, )) mae4 = T.zeros((1, )) if (y_dim > 1): theta_mu2_temp, theta_sig2_temp, coeff2_temp, y_pred2_temp = restResults[: 4] restResults = restResults[4:] theta_mu2_temp.name = 'theta_mu2' theta_sig2_temp.name = 'theta_sig2' coeff2_temp.name = 'coeff2' y_pred2_temp.name = 'disaggregation2' mse2 = T.mean((y_pred2_temp - y[:, :, 1].reshape( (y.shape[0], y.shape[1], 1)))**2) # As axis = None is calculated for all mae2 = T.mean( T.abs_(y_pred2_temp - y[:, :, 1].reshape((y.shape[0], y.shape[1], 1)))) mse2.name = 'mse2' mae2.name = 'mae2' theta_mu2_in = theta_mu2_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig2_in = theta_sig2_temp.reshape((x_shape[0] * x_shape[1], -1)) coeff2_in = coeff2_temp.reshape((x_shape[0] * x_shape[1], -1)) argsGMM = theta_mu2_in, theta_sig2_in, coeff2_in ddoutMSEA = ddoutMSEA + [mse2, mae2] ddoutYpreds = ddoutYpreds + [y_pred2_temp] #totaMSE+=mse2 indexSepDynamic += 2 if (y_dim > 2): theta_mu3_temp, theta_sig3_temp, coeff3_temp, y_pred3_temp = restResults[: 4] restResults = restResults[4:] theta_mu3_temp.name = 'theta_mu3' theta_sig3_temp.name = 'theta_sig3' coeff3_temp.name = 'coeff3' y_pred3_temp.name = 'disaggregation3' mse3 = T.mean((y_pred3_temp - y[:, :, 2].reshape( (y.shape[0], y.shape[1], 1)))**2) # As axis = None is calculated for all mae3 = T.mean( T.abs_(y_pred3_temp - y[:, :, 2].reshape((y.shape[0], y.shape[1], 1)))) mse3.name = 'mse3' mae3.name = 'mae3' theta_mu3_in = theta_mu3_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig3_in = theta_sig3_temp.reshape((x_shape[0] * x_shape[1], -1)) coeff3_in = coeff3_temp.reshape((x_shape[0] * x_shape[1], -1)) argsGMM = argsGMM + (theta_mu3_in, theta_sig3_in, coeff3_in) ddoutMSEA = ddoutMSEA + [mse3, mae3] ddoutYpreds = ddoutYpreds + [y_pred3_temp] #totaMSE+=mse3 indexSepDynamic += 2 if (y_dim > 3): theta_mu4_temp, theta_sig4_temp, coeff4_temp, y_pred4_temp = restResults[: 4] restResults = restResults[4:] theta_mu4_temp.name = 'theta_mu4' theta_sig4_temp.name = 'theta_sig4' coeff4_temp.name = 'coeff4' y_pred4_temp.name = 'disaggregation4' mse4 = T.mean((y_pred4_temp - y[:, :, 3].reshape( (y.shape[0], y.shape[1], 1)))**2) # As axis = None is calculated for all mae4 = T.mean( T.abs_(y_pred4_temp - y[:, :, 3].reshape((y.shape[0], y.shape[1], 1)))) mse4.name = 'mse4' mae4.name = 'mae4' theta_mu4_in = theta_mu4_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig4_in = theta_sig4_temp.reshape((x_shape[0] * x_shape[1], -1)) coeff4_in = coeff4_temp.reshape((x_shape[0] * x_shape[1], -1)) argsGMM = argsGMM + (theta_mu4_in, theta_sig4_in, coeff4_in) ddoutMSEA = ddoutMSEA + [mse4, mae4] ddoutYpreds = ddoutYpreds + [y_pred4_temp] #totaMSE+=mse4 indexSepDynamic += 2 totaMSE = (mse1 + mse2 + mse3 + mse4) / y_dim totaMSE.name = 'mse' totaMAE = (mae1 + mae2 + mae3 + mae4) / y_dim totaMAE.name = 'mae' recon = GMMdisagMulti( y_dim, y_in, theta_mu1_in, theta_sig1_in, coeff1_in, *argsGMM ) # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in) recon = recon.reshape((x_shape[0], x_shape[1])) recon.name = 'gmm_out' recon_term = recon.sum(axis=0).mean() recon_term = recon.sum(axis=0).mean() recon_term.name = 'recon_term' #kl_temp = kl_temp * mask kl_term = kl_temp.sum(axis=0).mean() kl_term.name = 'kl_term' #nll_upper_bound_0 = recon_term + kl_term #nll_upper_bound_0.name = 'nll_upper_bound_0' if (flgMSE == 1): nll_upper_bound = recon_term + kl_term + totaMSE else: nll_upper_bound = recon_term + kl_term nll_upper_bound.name = 'nll_upper_bound' ###################### model.inputs = [x, mask, y, y_mask, scheduleSamplingMask] model.params = params model.nodes = nodes optimizer = Adam(lr=lr) header = "epoch,log,kl,nll_upper_bound,mse,mae\n" extension = [ GradientClipping(batch_size=batch_size), EpochCount(epoch, save_path, header), Monitoring( freq=monitoring_freq, ddout=[ nll_upper_bound, recon_term, kl_term, totaMSE, totaMAE, mse1, mae1 ] + ddoutMSEA + ddoutYpreds, indexSep=indexSepDynamic, indexDDoutPlot=[13], # adding indexes of ddout for the plotting #, (6,y_pred_temp) instancesPlot=instancesPlot, #0-150 data=[Iterator(valid_data, batch_size)], savedFolder=save_path), Picklize(freq=monitoring_freq, path=save_path), EarlyStopping(freq=monitoring_freq, path=save_path, channel=channel_name), WeightNorm() ] mainloop = Training( name=pkl_name, data=Iterator(train_data, batch_size), model=model, optimizer=optimizer, cost=nll_upper_bound, outputs=[recon_term, kl_term, nll_upper_bound, totaMSE, totaMAE], n_steps=n_steps, extension=extension, lr_iterations=lr_iterations, k_speedOfconvergence=kSchedSamp) mainloop.run() ''' data=Iterator(test_data, batch_size) test_fn = theano.function(inputs=[x, y],#[x, y], #givens={x:Xtest}, #on_unused_input='ignore', #z=( ,200,1) allow_input_downcast=True, outputs=[prediction_val, recon_term_val, totaMSE_val, totaMAE_val, mse1_val,mse2_val,mse3_val,mse4_val, mae1_val,mae2_val,mae3_val,mae4_val, #unnormalized mae and mse 16 items# relErr1_val,relErr2_val,relErr3_val,relErr4_val, propAssigned1_val, propAssigned2_val,propAssigned3_val,propAssigned4_val], updates=updates_val ) testOutput = [] testMetrics2 = [] numBatchTest = 0 for batch in data: outputGeneration = test_fn(batch[0], batch[2]) testOutput.append(outputGeneration[1:12]) #before 36 including unnormalized metrics testMetrics2.append(outputGeneration[12:]) #{0:[4,20], 2:[5,10]} #if (numBatchTest==0): plt.figure(1) plt.plot(np.transpose(outputGeneration[0],[1,0,2])[4]) plt.savefig(save_path+"/vrnn_dis_generated{}_Pred_0-4".format(numBatchTest)) plt.clf() plt.figure(2) plt.plot(np.transpose(batch[2],[1,0,2])[4]) plt.savefig(save_path+"/vrnn_dis_generated{}_RealDisag_0-4".format(numBatchTest)) plt.clf() plt.figure(3) plt.plot(np.transpose(batch[0],[1,0,2])[4]) plt.savefig(save_path+"/vrnn_dis_generated{}_Realagg_0-4".format(numBatchTest)) plt.clf() numBatchTest+=1 testOutput = np.asarray(testOutput) testMetrics2 = np.asarray(testMetrics2) print(testOutput.shape) print(testMetrics2.shape) testOutput[:,19:] = 1000 * testOutput[:,19:] # kwtts a watts recon_test = testOutput[:, 0].mean() mse_test = testOutput[:, 1].mean() mae_test = testOutput[:, 2].mean() mse1_test = testOutput[:, 3].mean() mae1_test = testOutput[:, 7].mean() mse2_test = testOutput[:, 4].mean() mae2_test = testOutput[:, 8].mean() mse3_test = testOutput[:, 5].mean() mae3_test = testOutput[:, 9].mean() mse4_test = testOutput[:, 6].mean() mae4_test = testOutput[:, 10].mean() print(testOutput[:,3:11].mean(),testOutput[:,11:19].mean()) relErr1_test = testMetrics2[:,0].mean() relErr2_test = testMetrics2[:,1].mean() relErr3_test = testMetrics2[:,2].mean() relErr4_test = testMetrics2[:,3].mean() propAssigned1_test = testMetrics2[:, 8].mean() propAssigned2_test = testMetrics2[:, 9].mean() propAssigned3_test = testMetrics2[:, 10].mean() propAssigned4_test = testMetrics2[:, 11].mean() ''' fLog = open(save_path + '/output.csv', 'w') fLog.write(str(lr_iterations) + "\n") fLog.write(str(appliances) + "\n") fLog.write(str(windows) + "\n\n") fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,y2s_dim,z2s_dim\n") fLog.write("{},{},{},{},{},{}\n".format(q_z_dim, p_z_dim, p_x_dim, x2s_dim, y2s_dim, z2s_dim)) fLog.write("epoch,log,kl,mse1,mse2,mse3,mse4,mae1,mae2,mae3,mae4\n") for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']): e, f, g, h, j, k, l, n, p, q, r, s, t, u = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ep = mainloop.trainlog.monitor['epoch'][i] a = mainloop.trainlog.monitor['recon_term'][i] b = mainloop.trainlog.monitor['kl_term'][i] d = mainloop.trainlog.monitor['mse1'][i] m = mainloop.trainlog.monitor['mae1'][i] if (y_dim > 1): e = mainloop.trainlog.monitor['mse2'][i] n = mainloop.trainlog.monitor['mae2'][i] if (y_dim > 2): f = mainloop.trainlog.monitor['mse3'][i] p = mainloop.trainlog.monitor['mae3'][i] if (y_dim > 3): g = mainloop.trainlog.monitor['mse4'][i] q = mainloop.trainlog.monitor['mae4'][i] fLog.write( "{:d},{:.2f},{:.2f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f}\n" .format(ep, a, b, d, e, f, g, m, n, p, q))
def main(args): trial = int(args["trial"]) pkl_name = "vrnn_gauss_%d" % trial channel_name = "valid_nll_upper_bound" data_path = args["data_path"] save_path = args["save_path"] monitoring_freq = int(args["monitoring_freq"]) epoch = int(args["epoch"]) batch_size = int(args["batch_size"]) x_dim = int(args["x_dim"]) z_dim = int(args["z_dim"]) rnn_dim = int(args["rnn_dim"]) lr = float(args["lr"]) debug = int(args["debug"]) print "trial no. %d" % trial print "batch size %d" % batch_size print "learning rate %f" % lr print "saving pkl file '%s'" % pkl_name print "to the save path '%s'" % save_path q_z_dim = 150 p_z_dim = 150 p_x_dim = 250 x2s_dim = 250 z2s_dim = 150 target_dim = x_dim - 1 model = Model() train_data = IAMOnDB(name="train", prep="normalize", cond=False, path=data_path) X_mean = train_data.X_mean X_std = train_data.X_std valid_data = IAMOnDB(name="valid", prep="normalize", cond=False, path=data_path, X_mean=X_mean, X_std=X_std) init_W = InitCell("rand") init_U = InitCell("ortho") init_b = InitCell("zeros") init_b_sig = InitCell("const", mean=0.6) x, mask = train_data.theano_vars() if debug: x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32) temp = np.ones((15, batch_size), dtype=np.float32) temp[:, -2:] = 0.0 mask.tag.test_value = temp x_1 = FullyConnectedLayer( name="x_1", parent=["x_t"], parent_dim=[x_dim], nout=x2s_dim, unit="relu", init_W=init_W, init_b=init_b ) z_1 = FullyConnectedLayer( name="z_1", parent=["z_t"], parent_dim=[z_dim], nout=z2s_dim, unit="relu", init_W=init_W, init_b=init_b ) rnn = LSTM( name="rnn", parent=["x_1", "z_1"], parent_dim=[x2s_dim, z2s_dim], nout=rnn_dim, unit="tanh", init_W=init_W, init_U=init_U, init_b=init_b, ) phi_1 = FullyConnectedLayer( name="phi_1", parent=["x_1", "s_tm1"], parent_dim=[x2s_dim, rnn_dim], nout=q_z_dim, unit="relu", init_W=init_W, init_b=init_b, ) phi_mu = FullyConnectedLayer( name="phi_mu", parent=["phi_1"], parent_dim=[q_z_dim], nout=z_dim, unit="linear", init_W=init_W, init_b=init_b ) phi_sig = FullyConnectedLayer( name="phi_sig", parent=["phi_1"], parent_dim=[q_z_dim], nout=z_dim, unit="softplus", cons=1e-4, init_W=init_W, init_b=init_b_sig, ) prior_1 = FullyConnectedLayer( name="prior_1", parent=["s_tm1"], parent_dim=[rnn_dim], nout=p_z_dim, unit="relu", init_W=init_W, init_b=init_b ) prior_mu = FullyConnectedLayer( name="prior_mu", parent=["prior_1"], parent_dim=[p_z_dim], nout=z_dim, unit="linear", init_W=init_W, init_b=init_b, ) prior_sig = FullyConnectedLayer( name="prior_sig", parent=["prior_1"], parent_dim=[p_z_dim], nout=z_dim, unit="softplus", cons=1e-4, init_W=init_W, init_b=init_b_sig, ) theta_1 = FullyConnectedLayer( name="theta_1", parent=["z_1", "s_tm1"], parent_dim=[z2s_dim, rnn_dim], nout=p_x_dim, unit="relu", init_W=init_W, init_b=init_b, ) theta_mu = FullyConnectedLayer( name="theta_mu", parent=["theta_1"], parent_dim=[p_x_dim], nout=target_dim, unit="linear", init_W=init_W, init_b=init_b, ) theta_sig = FullyConnectedLayer( name="theta_sig", parent=["theta_1"], parent_dim=[p_x_dim], nout=target_dim, unit="softplus", cons=1e-4, init_W=init_W, init_b=init_b_sig, ) corr = FullyConnectedLayer( name="corr", parent=["theta_1"], parent_dim=[p_x_dim], nout=1, unit="tanh", init_W=init_W, init_b=init_b ) binary = FullyConnectedLayer( name="binary", parent=["theta_1"], parent_dim=[p_x_dim], nout=1, unit="sigmoid", init_W=init_W, init_b=init_b ) nodes = [ rnn, x_1, z_1, phi_1, phi_mu, phi_sig, prior_1, prior_mu, prior_sig, theta_1, theta_mu, theta_sig, corr, binary, ] params = OrderedDict() for node in nodes: if node.initialize() is not None: params.update(node.initialize()) params = init_tparams(params) s_0 = rnn.get_init_state(batch_size) x_1_temp = x_1.fprop([x], params) def inner_fn(x_t, s_tm1): phi_1_t = phi_1.fprop([x_t, s_tm1], params) phi_mu_t = phi_mu.fprop([phi_1_t], params) phi_sig_t = phi_sig.fprop([phi_1_t], params) prior_1_t = prior_1.fprop([s_tm1], params) prior_mu_t = prior_mu.fprop([prior_1_t], params) prior_sig_t = prior_sig.fprop([prior_1_t], params) z_t = Gaussian_sample(phi_mu_t, phi_sig_t) z_1_t = z_1.fprop([z_t], params) s_t = rnn.fprop([[x_t, z_1_t], [s_tm1]], params) return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_1_t ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp, z_1_temp), updates) = theano.scan( fn=inner_fn, sequences=[x_1_temp], outputs_info=[s_0, None, None, None, None, None] ) for k, v in updates.iteritems(): k.default_update = v s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0) theta_1_temp = theta_1.fprop([z_1_temp, s_temp], params) theta_mu_temp = theta_mu.fprop([theta_1_temp], params) theta_sig_temp = theta_sig.fprop([theta_1_temp], params) corr_temp = corr.fprop([theta_1_temp], params) binary_temp = binary.fprop([theta_1_temp], params) kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp) x_shape = x.shape x_in = x.reshape((x_shape[0] * x_shape[1], -1)) theta_mu_in = theta_mu_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig_in = theta_sig_temp.reshape((x_shape[0] * x_shape[1], -1)) corr_in = corr_temp.reshape((x_shape[0] * x_shape[1], -1)) binary_in = binary_temp.reshape((x_shape[0] * x_shape[1], -1)) recon = BiGauss(x_in, theta_mu_in, theta_sig_in, corr_in, binary_in) recon = recon.reshape((x_shape[0], x_shape[1])) recon = recon * mask recon_term = recon.sum(axis=0).mean() recon_term.name = "recon_term" kl_temp = kl_temp * mask kl_term = kl_temp.sum(axis=0).mean() kl_term.name = "kl_term" nll_upper_bound = recon_term + kl_term nll_upper_bound.name = "nll_upper_bound" max_x = x.max() mean_x = x.mean() min_x = x.min() max_x.name = "max_x" mean_x.name = "mean_x" min_x.name = "min_x" max_theta_mu = theta_mu_in.max() mean_theta_mu = theta_mu_in.mean() min_theta_mu = theta_mu_in.min() max_theta_mu.name = "max_theta_mu" mean_theta_mu.name = "mean_theta_mu" min_theta_mu.name = "min_theta_mu" max_theta_sig = theta_sig_in.max() mean_theta_sig = theta_sig_in.mean() min_theta_sig = theta_sig_in.min() max_theta_sig.name = "max_theta_sig" mean_theta_sig.name = "mean_theta_sig" min_theta_sig.name = "min_theta_sig" max_phi_sig = phi_sig_temp.max() mean_phi_sig = phi_sig_temp.mean() min_phi_sig = phi_sig_temp.min() max_phi_sig.name = "max_phi_sig" mean_phi_sig.name = "mean_phi_sig" min_phi_sig.name = "min_phi_sig" max_prior_sig = prior_sig_temp.max() mean_prior_sig = prior_sig_temp.mean() min_prior_sig = prior_sig_temp.min() max_prior_sig.name = "max_prior_sig" mean_prior_sig.name = "mean_prior_sig" min_prior_sig.name = "min_prior_sig" model.inputs = [x, mask] model.params = params model.nodes = nodes optimizer = Adam(lr=lr) extension = [ GradientClipping(batch_size=batch_size), EpochCount(epoch), Monitoring( freq=monitoring_freq, ddout=[ nll_upper_bound, recon_term, kl_term, max_phi_sig, mean_phi_sig, min_phi_sig, max_prior_sig, mean_prior_sig, min_prior_sig, max_theta_sig, mean_theta_sig, min_theta_sig, max_x, mean_x, min_x, max_theta_mu, mean_theta_mu, min_theta_mu, ], data=[Iterator(valid_data, batch_size)], ), Picklize(freq=monitoring_freq, path=save_path), EarlyStopping(freq=monitoring_freq, path=save_path, channel=channel_name), WeightNorm(), ] mainloop = Training( name=pkl_name, data=Iterator(train_data, batch_size), model=model, optimizer=optimizer, cost=nll_upper_bound, outputs=[nll_upper_bound], extension=extension, ) mainloop.run()
mse = MSE(y_in, y_hat_in) mse = mse.mean() mse.name = 'mse' model.inputs = [x, y] model._params = params model.nodes = nodes optimizer = Adam(lr=0.001) extension = [ GradientClipping(batch_size=batch_size), EpochCount(100), Monitoring(freq=100, ddout=[mse], data=[ Iterator(valid_data, batch_size), Iterator(train_data, batch_size) ]), Picklize(freq=200, path=save_path) ] mainloop = Training(name='toy_bb_lstm', data=Iterator(train_data, batch_size), model=model, optimizer=optimizer, cost=mse, outputs=[mse], extension=extension) mainloop.run()
def main(args): trial = int(args['trial']) pkl_name = 'rnn_gauss_%d' % trial channel_name = 'valid_nll' data_path = args['data_path'] save_path = args['save_path'] flgMSE = int(args['flgMSE']) monitoring_freq = int(args['monitoring_freq']) epoch = int(args['epoch']) batch_size = int(args['batch_size']) x_dim = int(args['x_dim']) z_dim = int(args['z_dim']) y_dim = int(args['y_dim']) flgAgg = int(args['flgAgg']) rnn_dim = int(args['rnn_dim']) lr = float(args['lr']) debug = int(args['debug']) print "trial no. %d" % trial print "batch size %d" % batch_size print "learning rate %f" % lr print "saving pkl file '%s'" % pkl_name print "to the save path '%s'" % save_path x2s_dim = 340 s2x_dim = 340 target_dim = k #x_dim - 1 model = Model() train_data = UKdale(name='train', prep='normalize', cond=False, path=data_path, windows=windows, appliances=appliances, numApps=flgAgg, period=period, n_steps=n_steps, stride_train=stride_train, stride_test=stride_test) X_mean = train_data.X_mean X_std = train_data.X_std valid_data = UKdale(name='valid', prep='normalize', cond=False, path=data_path, X_mean=X_mean, X_std=X_std, windows=windows, appliances=appliances, numApps=flgAgg, period=period, n_steps=n_steps, stride_train=stride_train, stride_test=stride_test) init_W = InitCell('rand') init_U = InitCell('ortho') init_b = InitCell('zeros') init_b_sig = InitCell('const', mean=0.6) x, y = train_data.theano_vars() if debug: x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32) temp = np.ones((15, batch_size), dtype=np.float32) temp[:, -2:] = 0. mask.tag.test_value = temp x_1 = FullyConnectedLayer(name='x_1', parent=['x_t'], parent_dim=[x_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) rnn = LSTM(name='rnn', parent=['x_1'], parent_dim=[x2s_dim], nout=rnn_dim, unit='tanh', init_W=init_W, init_U=init_U, init_b=init_b) theta_1 = FullyConnectedLayer(name='theta_1', parent=['s_tm1'], parent_dim=[rnn_dim], nout=s2x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_mu = FullyConnectedLayer(name='theta_mu', parent=['theta_1'], parent_dim=[s2x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_sig = FullyConnectedLayer(name='theta_sig', parent=['theta_1'], parent_dim=[s2x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) corr = FullyConnectedLayer(name='corr', parent=['theta_1'], parent_dim=[s2x_dim], nout=1, unit='tanh', init_W=init_W, init_b=init_b) binary = FullyConnectedLayer(name='binary', parent=['theta_1'], parent_dim=[s2x_dim], nout=1, unit='sigmoid', init_W=init_W, init_b=init_b) nodes = [rnn, x_1, theta_1, theta_mu, theta_sig] #, corr, binary params = OrderedDict() for node in nodes: if node.initialize() is not None: params.update(node.initialize()) params = init_tparams(params) s_0 = rnn.get_init_state(batch_size) x_1_temp = x_1.fprop([x], params) def inner_fn(x_t, s_tm1): s_t = rnn.fprop([[x_t], [s_tm1]], params) theta_1_t = theta_1.fprop([s_t], params) theta_mu_t = theta_mu.fprop([theta_1_t], params) theta_sig_t = theta_sig.fprop([theta_1_t], params) coeff_t = coeff.fprop([theta_1_t], params) pred = Gaussian_sample(theta_mu_t, theta_sig_t) return s_t, theta_mu_t, theta_sig_t, coeff_t, pred ((s_temp, theta_mu_temp, theta_sig_temp, coeff_temp, pred_temp), updates) = theano.scan(fn=inner_fn, sequences=[x_1_temp], outputs_info=[s_0, None, None, None, None]) for k, v in updates.iteritems(): k.default_update = v s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0) ''' theta_1_temp = theta_1.fprop([s_temp], params) theta_mu_temp = theta_mu.fprop([theta_1_temp], params) theta_sig_temp = theta_sig.fprop([theta_1_temp], params) corr_temp = corr.fprop([theta_1_temp], params) binary_temp = binary.fprop([theta_1_temp], params) ''' x_shape = x.shape x_in = x.reshape((x_shape[0] * x_shape[1], -1)) theta_mu_in = theta_mu_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig_in = theta_sig_temp.reshape((x_shape[0] * x_shape[1], -1)) corr_in = corr_temp.reshape((x_shape[0] * x_shape[1], -1)) binary_in = binary_temp.reshape((x_shape[0] * x_shape[1], -1)) if (flgAgg == -1): prediction.name = 'x_reconstructed' mse = T.mean((prediction - x)**2) # CHECK RESHAPE with an assertion mae = T.mean(T.abs(prediction - x)) mse.name = 'mse' pred_in = x.reshape((x_shape[0] * x_shape[1], -1)) else: pred_temp = pred_temp.reshape((pred_temp.shape[0], pred_temp.shape[1])) pred_temp.name = 'pred_' + str(flgAgg) #y[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1)) mse = T.mean((pred_temp - y.T)**2) # CHECK RESHAPE with an assertion mae = T.mean(T.abs_(pred_temp - y.T)) mse.name = 'mse' mae.name = 'mae' pred_in = y.reshape((x.shape[0] * x.shape[1], -1), ndim=2) recon = Gaussian(pred_in, theta_mu_in, theta_sig_in) recon = recon.reshape((x_shape[0], x_shape[1])) #recon = recon * mask recon_term = recon.sum(axis=0).mean() recon_term.name = 'nll' max_x = x.max() mean_x = x.mean() min_x = x.min() max_x.name = 'max_x' mean_x.name = 'mean_x' min_x.name = 'min_x' max_theta_mu = theta_mu_in.max() mean_theta_mu = theta_mu_in.mean() min_theta_mu = theta_mu_in.min() max_theta_mu.name = 'max_theta_mu' mean_theta_mu.name = 'mean_theta_mu' min_theta_mu.name = 'min_theta_mu' max_theta_sig = theta_sig_in.max() mean_theta_sig = theta_sig_in.mean() min_theta_sig = theta_sig_in.min() max_theta_sig.name = 'max_theta_sig' mean_theta_sig.name = 'mean_theta_sig' min_theta_sig.name = 'min_theta_sig' model.inputs = [x, y] model.params = params model.nodes = nodes optimizer = Adam(lr=lr) extension = [ GradientClipping(batch_size=batch_size), EpochCount(epoch), Monitoring(freq=monitoring_freq, ddout=[ recon_term, max_theta_sig, mean_theta_sig, min_theta_sig, max_x, mean_x, min_x, max_theta_mu, mean_theta_mu, min_theta_mu ], data=[Iterator(valid_data, batch_size)]), Picklize(freq=monitoring_freq, path=save_path), EarlyStopping(freq=monitoring_freq, path=save_path, channel=channel_name), WeightNorm() ] mainloop = Training(name=pkl_name, data=Iterator(train_data, batch_size), model=model, optimizer=optimizer, cost=recon_term, outputs=[recon_term], extension=extension) mainloop.run() fLog = open(save_path + '/output.csv', 'w') fLog.write("log,mse,mae\n") for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']): a = mainloop.trainlog.monitor['recon_term'][i] d = mainloop.trainlog.monitor['mse'][i] e = mainloop.trainlog.monitor['mae'][i] fLog.write("{},{},{}\n".format(a, d, e))
nodes = [h1, h2, h3, h4, cost] rnn = Net(inputs=inputs, inputs_dim=inputs_dim, nodes=nodes) cost = unpack(rnn.build_recurrent_graph(output_args=[cost])) cost = cost.mean() cost.name = 'cost' model.graphs = [rnn] optimizer = Adam( lr=0.001 ) extension = [ GradientClipping(batch_size=batch_size), EpochCount(100), Monitoring(freq=100, ddout=[cost]), Picklize(freq=200, path=save_path) ] mainloop = Training( name='toy_bb_gflstm', data=Iterator(trdata, batch_size), model=model, optimizer=optimizer, cost=cost, outputs=[cost], extension=extension ) mainloop.run()
def main(args): theano.optimizer = 'fast_compile' theano.config.exception_verbosity = 'high' trial = int(args['trial']) pkl_name = 'vrnn_gauss_%d' % trial channel_name = 'valid_nll_upper_bound' data_path = args['data_path'] save_path = args['save_path'] save_path = args['save_path'] period = int(args['period']) n_steps = int(args['n_steps']) stride_train = int(args['stride_train']) stride_test = int(args['stride_test']) monitoring_freq = int(args['monitoring_freq']) epoch = int(args['epoch']) batch_size = int(args['batch_size']) x_dim = int(args['x_dim']) z_dim = int(args['z_dim']) rnn_dim = int(args['rnn_dim']) lr = float(args['lr']) debug = int(args['debug']) print "trial no. %d" % trial print "batch size %d" % batch_size print "learning rate %f" % lr print "saving pkl file '%s'" % pkl_name print "to the save path '%s'" % save_path q_z_dim = 150 p_z_dim = 150 p_x_dim = 250 x2s_dim = 10 #250 z2s_dim = 10 #150 target_dim = x_dim #(x_dim-1) model = Model() Xtrain, ytrain, Xval, yval = fetch_ukdale(data_path, windows, appliances, numApps=flgAgg, period=period, n_steps=n_steps, stride_train=stride_train, stride_test=stride_test) train_data = UKdale( name='train', prep='normalize', cond=True, # False #path=data_path, inputX=Xtrain, labels=ytrain) X_mean = train_data.X_mean X_std = train_data.X_std valid_data = UKdale( name='valid', prep='normalize', cond=True, # False #path=data_path, X_mean=X_mean, X_std=X_std, inputX=Xval, labels=yval) init_W = InitCell('rand') init_U = InitCell('ortho') init_b = InitCell('zeros') init_b_sig = InitCell('const', mean=0.6) x, y = train_data.theano_vars() if debug: x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32) temp = np.ones((15, batch_size), dtype=np.float32) temp[:, -2:] = 0. mask.tag.test_value = temp x_1 = FullyConnectedLayer( name='x_1', parent=['x_t'], #OrderDict parent['x_t'] = x_dim parent_dim=[x_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) z_1 = FullyConnectedLayer(name='z_1', parent=['z_t'], parent_dim=[z_dim], nout=z2s_dim, unit='relu', init_W=init_W, init_b=init_b) rnn = LSTM(name='rnn', parent=['x_1', 'z_1'], parent_dim=[x2s_dim, z2s_dim], nout=rnn_dim, unit='tanh', init_W=init_W, init_U=init_U, init_b=init_b) phi_1 = FullyConnectedLayer( name='phi_1', ## encoder parent=['x_1', 's_tm1'], parent_dim=[x2s_dim, rnn_dim], nout=q_z_dim, unit='relu', init_W=init_W, init_b=init_b) phi_mu = FullyConnectedLayer(name='phi_mu', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) phi_sig = FullyConnectedLayer(name='phi_sig', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) prior_1 = FullyConnectedLayer(name='prior_1', parent=['s_tm1'], parent_dim=[rnn_dim], nout=p_z_dim, unit='relu', init_W=init_W, init_b=init_b) prior_mu = FullyConnectedLayer(name='prior_mu', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) prior_sig = FullyConnectedLayer(name='prior_sig', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_1 = FullyConnectedLayer( name='theta_1', ### decoder parent=['z_1', 's_tm1'], parent_dim=[z2s_dim, rnn_dim], nout=p_x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_mu = FullyConnectedLayer(name='theta_mu', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_sig = FullyConnectedLayer(name='theta_sig', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) corr = FullyConnectedLayer( name='corr', ## rho parent=['theta_1'], parent_dim=[p_x_dim], nout=1, unit='tanh', init_W=init_W, init_b=init_b) binary = FullyConnectedLayer(name='binary', parent=['theta_1'], parent_dim=[p_x_dim], nout=1, unit='sigmoid', init_W=init_W, init_b=init_b) nodes = [ rnn, x_1, z_1, phi_1, phi_mu, phi_sig, prior_1, prior_mu, prior_sig, theta_1, theta_mu, theta_sig ] #, corr, binary params = OrderedDict() for node in nodes: if node.initialize() is not None: params.update( node.initialize() ) #Initialize values of the W matrices according to dim of parents params = init_tparams(params) s_0 = rnn.get_init_state(batch_size) x_1_temp = x_1.fprop([x], params) def inner_fn(x_t, s_tm1): phi_1_t = phi_1.fprop([x_t, s_tm1], params) phi_mu_t = phi_mu.fprop([phi_1_t], params) phi_sig_t = phi_sig.fprop([phi_1_t], params) prior_1_t = prior_1.fprop([s_tm1], params) prior_mu_t = prior_mu.fprop([prior_1_t], params) prior_sig_t = prior_sig.fprop([prior_1_t], params) z_t = Gaussian_sample(phi_mu_t, phi_sig_t) z_1_t = z_1.fprop([z_t], params) theta_1_t = theta_1.fprop([z_1_t, s_tm1], params) theta_mu_t = theta_mu.fprop([theta_1_t], params) theta_sig_t = theta_sig.fprop([theta_1_t], params) pred = Gaussian_sample(theta_mu_t, theta_sig_t) s_t = rnn.fprop([[x_t, z_1_t], [s_tm1]], params) return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_t, z_1_t, theta_1_t, theta_mu_t, theta_sig_t, pred ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp, z_temp, z_1_temp, theta_1_temp, theta_mu_temp, theta_sig_temp, pred_temp), updates) =\ theano.scan(fn=inner_fn, sequences=[x_1_temp], #non_sequences unchanging variables #The tensor(s) to be looped over should be provided to scan using the sequence keyword argument outputs_info=[s_0, None, None, None, None, None, None, None, None, None, None])#Initialization occurs in outputs_info #=None This indicates to scan that it does not need to pass the prior result to _fn ''' The general order of function parameters to: sequences (if any), prior result(s) (if needed), non-sequences (if any) ''' for k, v in updates.iteritems(): print("Update") k.default_update = v s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0) s_temp.name = 'h_1' #gisse z_temp.name = 'z' z_1_temp.name = 'z_1' #gisse #theta_1_temp = theta_1.fprop([z_1_temp, s_temp], params) #theta_mu_temp = theta_mu.fprop([theta_1_temp], params) theta_mu_temp.name = 'theta_mu' #theta_sig_temp = theta_sig.fprop([theta_1_temp], params) theta_sig_temp.name = 'theta_sig' x_pred_temp.name = 'x_reconstructed' #corr_temp = corr.fprop([theta_1_temp], params) #corr_temp.name = 'corr' #binary_temp = binary.fprop([theta_1_temp], params) #binary_temp.name = 'binary' if (flgAgg == -1): prediction.name = 'x_reconstructed' mse = T.mean((prediction - x)**2) # CHECK RESHAPE with an assertion mae = T.mean(T.abs(prediction - x)) mse.name = 'mse' pred_in = x.reshape((x_shape[0] * x_shape[1], -1)) else: prediction.name = 'pred_' + str(flgAgg) mse = T.mean((prediction - y[:, :, flgAgg].reshape( (y.shape[0], y.shape[1], 1)))**2) # CHECK RESHAPE with an assertion mae = T.mean( T.abs_(prediction - y[:, :, flgAgg].reshape((y.shape[0], y.shape[1], 1)))) mse.name = 'mse' mae.name = 'mae' pred_in = y[:, :, flgAgg].reshape((x.shape[0] * x.shape[1], -1), ndim=2) kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp) #x_shape = x.shape #x_in = x.reshape((x_shape[0]*x_shape[1], -1)) theta_mu_in = theta_mu_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig_in = theta_sig_temp.reshape((x_shape[0] * x_shape[1], -1)) #corr_in = corr_temp.reshape((x_shape[0]*x_shape[1], -1)) #binary_in = binary_temp.reshape((x_shape[0]*x_shape[1], -1)) recon = Gaussian( pred_in, theta_mu_in, theta_sig_in ) # BiGauss(x_in, theta_mu_in, theta_sig_in, corr_in, binary_in) # second term for the loss function recon = recon.reshape((x_shape[0], x_shape[1])) #recon = recon * mask recon_term = recon.sum(axis=0).mean() recon_term.name = 'recon_term' #kl_temp = kl_temp * mask kl_term = kl_temp.sum(axis=0).mean() kl_term.name = 'kl_term' nll_upper_bound = recon_term + kl_term nll_upper_bound.name = 'nll_upper_bound' max_x = x.max() mean_x = x.mean() min_x = x.min() max_x.name = 'max_x' mean_x.name = 'mean_x' min_x.name = 'min_x' max_theta_mu = theta_mu_in.max() mean_theta_mu = theta_mu_in.mean() min_theta_mu = theta_mu_in.min() max_theta_mu.name = 'max_theta_mu' mean_theta_mu.name = 'mean_theta_mu' min_theta_mu.name = 'min_theta_mu' max_theta_sig = theta_sig_in.max() mean_theta_sig = theta_sig_in.mean() min_theta_sig = theta_sig_in.min() max_theta_sig.name = 'max_theta_sig' mean_theta_sig.name = 'mean_theta_sig' min_theta_sig.name = 'min_theta_sig' max_phi_sig = phi_sig_temp.max() mean_phi_sig = phi_sig_temp.mean() min_phi_sig = phi_sig_temp.min() max_phi_sig.name = 'max_phi_sig' mean_phi_sig.name = 'mean_phi_sig' min_phi_sig.name = 'min_phi_sig' max_prior_sig = prior_sig_temp.max() mean_prior_sig = prior_sig_temp.mean() min_prior_sig = prior_sig_temp.min() max_prior_sig.name = 'max_prior_sig' mean_prior_sig.name = 'mean_prior_sig' min_prior_sig.name = 'min_prior_sig' prior_sig_output = prior_sig_temp prior_sig_output.name = 'prior_sig_o' phi_sig_output = phi_sig_temp phi_sig_output.name = 'phi_sig_o' model.inputs = [x, mask] model.params = params model.nodes = nodes optimizer = Adam(lr=lr) extension = [ GradientClipping(batch_size=batch_size), EpochCount(epoch), Monitoring( freq=monitoring_freq, ddout=[ nll_upper_bound, recon_term, kl_term, mse, mae, max_phi_sig, mean_phi_sig, min_phi_sig, max_prior_sig, mean_prior_sig, min_prior_sig, max_theta_sig, mean_theta_sig, min_theta_sig, max_x, mean_x, min_x, max_theta_mu, mean_theta_mu, min_theta_mu, #0-17 #binary_temp, corr_temp, theta_mu_temp, theta_sig_temp, #17-20 s_temp, z_temp, z_1_temp, x_pred_temp #phi_sig_output,phi_sig_output ], ## added in order to explore the distributions indexSep=22, indexDDoutPlot=[(0, theta_mu_temp), (2, z_t_temp), (3, prediction)], instancesPlot=[0, 150], #, 80,150 savedFolder=save_path, data=[Iterator(valid_data, batch_size)]), Picklize(freq=monitoring_freq, path=save_path), EarlyStopping(freq=monitoring_freq, path=save_path, channel=channel_name), WeightNorm() ] mainloop = Training(name=pkl_name, data=Iterator(train_data, batch_size), model=model, optimizer=optimizer, cost=nll_upper_bound, outputs=[nll_upper_bound], extension=extension) mainloop.run() fLog = open(save_path + '/output.csv', 'w') fLog.write("log,kl,nll_upper_bound,mse,mae\n") for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']): a = mainloop.trainlog.monitor['recon_term'][i] b = mainloop.trainlog.monitor['kl_term'][i] c = mainloop.trainlog.monitor['nll_upper_bound'][i] d = mainloop.trainlog.monitor['mse'][i] e = mainloop.trainlog.monitor['mae'][i] fLog.write("{},{},{},{},{}\n".format(a, b, c, d, e))