def _define_rate(self, Input=None): """ """ if Input is None: Input = self.X Nsamps = tf.shape(Input)[0] NTbins = tf.shape(Input)[1] xDim = self.xDim yDim = self.yDim Input = tf.reshape(Input, [Nsamps*NTbins, xDim], name='X_input') rangeY = self.params.initrange_outY self.inv_tau = inv_tau = 0.3 obs_nodes = 64 fully_connected_layer = FullLayer() with tf.variable_scope("obs_nn", reuse=tf.AUTO_REUSE): full1 = fully_connected_layer(Input, obs_nodes, 'softplus', 'full1') full2 = fully_connected_layer(full1, obs_nodes, 'softplus', 'full2') if self.is_out_positive: rate_NTxD = fully_connected_layer(full2, yDim, 'softplus', 'output', b_initializer=tf.random_normal_initializer(1.0, rangeY)) else: full3 = fully_connected_layer(full2, yDim, 'linear', 'output', initializer=tf.random_uniform_initializer(-rangeY, rangeY)) rate_NTxD = tf.exp(inv_tau*full3) return rate_NTxD
def add_a_random_full_layer(self): mean = self.init_mean() std = self.init_std() hidden_neuron_num = self.init_hidden_neuron_size() full_layer = FullLayer(hidden_neuron_num=hidden_neuron_num, weight_matrix=[mean, std]) return full_layer
def _define_mean_variance(self, Input=None): """ """ if Input is None: Input = self.X xDim = self.xDim yDim = self.yDim Nsamps = tf.shape(Input)[0] NTbins = tf.shape(Input)[1] Input = tf.reshape(Input, [Nsamps*NTbins, xDim], name='X_input') rangeY = self.params.initrange_Goutmean initSigma = self.params.initrange_Goutvar init_b = self.params.initbias_Goutmean obs_nodes = 64 fully_connected_layer = FullLayer() with tf.variable_scope("obs_nn_mean", reuse=tf.AUTO_REUSE): full1 = fully_connected_layer(Input, obs_nodes, 'softplus', 'full1', initializer=tf.random_normal_initializer(stddev=0.5)) full2 = fully_connected_layer(full1, obs_nodes, 'softplus', 'full2', initializer=tf.random_normal_initializer(stddev=0.5)) MuY_NTxD = fully_connected_layer(full2, yDim, 'linear', 'output', initializer=tf.random_uniform_initializer(-rangeY, rangeY), b_initializer=tf.random_normal_initializer(init_b) ) MuY_NxTxD = tf.reshape(MuY_NTxD, [Nsamps, NTbins, yDim]) with tf.variable_scope("obs_var", reuse=tf.AUTO_REUSE): SigmaInvChol_DxD = tf.get_variable('SigmaInvChol', initializer=tf.cast(initSigma*tf.eye(yDim), DTYPE)) self.SigmaChol_DxD = tf.reshape(tf.matrix_inverse(SigmaInvChol_DxD), [1, 1, yDim, yDim]) # Needed only for sampling SigmaInv_DxD = tf.matmul(SigmaInvChol_DxD, SigmaInvChol_DxD, transpose_b=True) return MuY_NxTxD, SigmaInv_DxD
def poissonEncoding(Y, X, yDim, xDim, learning_rate): DTYPE = tf.float32 #yDim = tf.shape(Y)[1]; #xDim = tf.shape(X)[1]; fullyConnectedLayer = FullLayer() rangeRate1 = 1 / tf.sqrt(tf.cast(xDim, DTYPE)) with tf.variable_scope("poi_rate_nn", reuse=tf.AUTO_REUSE): full = fullyConnectedLayer(X, yDim, nl='linear', scope='output', initializer=tf.random_uniform_initializer( minval=-rangeRate1, maxval=rangeRate1)) rate = tf.exp(full) entropy_loss = tf.reduce_sum(Y * tf.log(rate) - rate) #-tf.reduce_sum((Y-rate)**2)# with tf.variable_scope("poi_adam", reuse=tf.AUTO_REUSE): optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(-entropy_loss) return optimizer, entropy_loss, rate
def _define_rate(self, Input=None): """ Define the generative map for the rate of Poisson observations Y = f(X). The map is defined differently depending on params.is_out_positive. params.is_out_positive == True -> Y = NN(X) where the last layer is a softplus. params.is_out_positive == False -> Y = exp{NN(X)/tau} where the last layer of the NN is a linear layer. """ params = self.params if Input is None: Input = self.X Nsamps = tf.shape(Input)[0] NTbins = tf.shape(Input)[1] xDim = self.xDim yDim = self.yDim Input = tf.reshape(Input, [Nsamps * NTbins, xDim], name='X_input') rangeY = params.initrange_outY self.inv_tau = inv_tau = params.inv_tau obs_nodes = 64 fully_connected_layer = FullLayer() with tf.variable_scope("obs_nn", reuse=tf.AUTO_REUSE): if params.is_linear_output: full = fully_connected_layer(Input, yDim, 'linear', scope='output') rate_NTxD = tf.exp(inv_tau * full) else: full1 = fully_connected_layer(Input, obs_nodes, 'softplus', 'full1') full2 = fully_connected_layer(full1, obs_nodes, 'softplus', 'full2') if params.is_out_positive: rate_NTxD = fully_connected_layer( full2, yDim, 'softplus', scope='output', b_initializer=tf.random_normal_initializer( 1.0, rangeY)) else: full3 = fully_connected_layer(full2, yDim, 'linear', scope='output') # initializer=tf.random_uniform_initializer(-rangeY, rangeY)) rate_NTxD = tf.exp(inv_tau * full3) self.rate_NxTxD = tf.reshape(rate_NTxD, [Nsamps, NTbins, yDim], name='outY') return rate_NTxD
def gammaEncoding(Y, X, yDim, xDim, gen_nodes, learning_rate): DTYPE = tf.float32 #yDim = tf.shape(Y)[1]; #xDim = tf.shape(X)[1]; fullyConnectedLayer = FullLayer() rangeRate1 = 1 / tf.sqrt(tf.cast(xDim, DTYPE)) rangeRate2 = 1 / tf.sqrt(tf.cast(gen_nodes, DTYPE)) with tf.variable_scope("gamma_rate_nn", reuse=tf.AUTO_REUSE): full1 = fullyConnectedLayer(X, gen_nodes, nl='tanh', scope='full1', initializer=tf.random_uniform_initializer( minval=-rangeRate1, maxval=rangeRate1)) full2 = fullyConnectedLayer(full1, gen_nodes, nl='tanh', scope='full2', initializer=tf.random_uniform_initializer( minval=-rangeRate2, maxval=rangeRate2)) full_theta = fullyConnectedLayer( full2, yDim, nl='linear', scope='output_theta', initializer=tf.random_uniform_initializer(minval=-rangeRate2, maxval=rangeRate2)) full_k = fullyConnectedLayer( full2, yDim, nl='linear', scope='output_k', initializer=tf.random_uniform_initializer(minval=-rangeRate2, maxval=rangeRate2)) theta = tf.exp(full_theta) + 1e-6 k = tf.exp(full_k) + 1e-7 # now compute the entropy loss LY1 = tf.reduce_sum(-k * tf.log(theta) - Y / theta) LY2 = tf.reduce_sum((k - 1) * tf.log(Y)) LY3 = tf.reduce_sum(-tf.lgamma(k)) entropy_loss = LY1 + LY2 + LY3 with tf.variable_scope("gamma_adam", reuse=tf.AUTO_REUSE): optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(-entropy_loss) rate = theta * k return optimizer, entropy_loss, theta, k, rate
def _define_rate(self, Input=None): """ Define the map lambda(x), the rate of the Poisson observations as a function of the latent state. If params.poisson_is_out_positive is True, the output of the map is positive definite through a softplus nonlinearity. Otherwise, an exponential activation is applied to a linear layer """ params = self.params if Input is None: Input = self.X Nsamps = tf.shape(Input)[0] NTbins = tf.shape(Input)[1] xDim = self.xDim yDim = self.yDim Input = tf.reshape(Input, [Nsamps * NTbins, xDim], name='X_input') rangeY = params.initrange_outY self.inv_tau = inv_tau = params.poisson_inv_tau obs_nodes = 64 fully_connected_layer = FullLayer() with tf.variable_scope("obs_nn", reuse=tf.AUTO_REUSE): if params.is_linear_output: full = fully_connected_layer(Input, yDim, 'linear', scope='output') rate_NTxD = tf.exp(inv_tau * full) else: full1 = fully_connected_layer(Input, obs_nodes, 'softplus', 'full1') full2 = fully_connected_layer(full1, obs_nodes, 'softplus', 'full2') if params.poisson_is_out_positive: rate_NTxD = fully_connected_layer( full2, yDim, 'softplus', scope='output', b_initializer=tf.random_normal_initializer( 1.0, rangeY)) else: full3 = fully_connected_layer(full2, yDim, 'linear', scope='output') # initializer=tf.random_uniform_initializer(-rangeY, rangeY)) rate_NTxD = tf.exp(inv_tau * full3) self.rate_NxTxD = tf.reshape(rate_NTxD, [Nsamps, NTbins, yDim], name='outY') return rate_NTxD
class cnn(object): class simple_cnn_model(object): def __init__(self, epochs, batch_size, lr): self.epochs = epochs self.batch_size = batch_size self.lr = lr def load_data(self): # load data from cifar100 folder (x_train, y_train), (x_test, y_test) = cifar100(1211506319) return x_train, y_train, x_test, y_test def train_model(self, layers, loss_metrics, x_train, y_train): # build model self.model = Sequential(layers, loss_metrics) # train the model loss = self.model.fit(x_train, y_train, self.epochs, self.lr, self.batch_size, print_output=True) avg_loss = np.mean(np.reshape(loss, (self.epochs, -1)), axis=1) return avg_loss def test_model(self, x_test, y_test): # make a prediction pred_result = self.model.predict(x_test) accuracy = np.mean(pred_result == y_test) return accuracy if __name__ == '__main__': # define model parameters epochs = 15 batch_size = 128 lr = [.1] # define layers layers = (ConvLayer(3, 16, 3), ReluLayer(), MaxPoolLayer(), ConvLayer(16, 32, 3), ReluLayer(), MaxPoolLayer(), FlattenLayer(), FullLayer(2048, 4), SoftMaxLayer()) loss_matrics = CrossEntropyLayer() # build and train model model = simple_cnn_model(epochs, batch_size, lr) x_train, y_train, x_test, y_test = model.load_data() loss = model.train_model(layers, loss_matrics, x_train, y_train) accuracy = model.test_model(x_test, y_test) print("loss: %s" % loss) print("The accuracy of the model is %s" % accuracy)
def mutate_full_layer(self, unit, eta): #num of hidden neurons, mean ,std n_hidden = unit.hidden_neuron_num mean = unit.weight_matrix_mean std = unit.weight_matrix_std new_n_hidden = int( self.pm(self.hidden_neurons_range[0], self.hidden_neurons_range[-1], n_hidden, eta)) new_mean = self.pm(self.mean_range[0], self.mean_range[1], mean, eta) new_std = self.pm(self.std_range[0], self.std_range[1], std, eta) full_layer = FullLayer(hidden_neuron_num=new_n_hidden, weight_matrix=[new_mean, new_std]) return full_layer
def _define_evolution_network(self, Input=None): """ """ xDim = self.xDim if Input is None: Input = self.X Nsamps = self.Nsamps NTbins = self.NTbins else: Nsamps = tf.shape(Input)[0] NTbins = tf.shape(Input)[1] alpha = self.alpha rangeB = self.params.initrange_B evnodes = 200 Input = tf.reshape(Input, [Nsamps * NTbins, xDim]) fully_connected_layer = FullLayer(collections=['EVOLUTION_PARS']) with tf.variable_scope("ev_nn", reuse=tf.AUTO_REUSE): full1 = fully_connected_layer(Input, evnodes, 'softmax', 'full1') # full2 = fully_connected_layer(full1, evnodes//2, 'relu', 'full2', # initializer=tf.orthogonal_initializer()) output = fully_connected_layer( full1, xDim**2, nl='linear', scope='output', initializer=tf.random_uniform_initializer(-rangeB, rangeB)) B_NxTxdxd = tf.reshape(output, [Nsamps, NTbins, xDim, xDim], name='B') B_NTxdxd = tf.reshape(output, [Nsamps * NTbins, xDim, xDim]) A_NTxdxd = alpha * B_NTxdxd + self.Alinear_dxd # Broadcast X_norms = tf.norm(Input, axis=1) fl_mod = flow_modulator_tf(X_norms) eye_swap = tf.transpose( tf.tile(tf.expand_dims(tf.eye(self.xDim), 0), [Nsamps * NTbins, 1, 1]), [2, 1, 0]) Awinflow_NTxdxd = tf.transpose( fl_mod * tf.transpose(A_NTxdxd, [2, 1, 0]) + 0.9 * (1.0 - fl_mod) * eye_swap, [2, 1, 0]) A_NxTxdxd = tf.reshape(A_NTxdxd, [Nsamps, NTbins, xDim, xDim], name='A') Awinflow_NxTxdxd = tf.reshape(Awinflow_NTxdxd, [Nsamps, NTbins, xDim, xDim], name='Awinflow') return A_NxTxdxd, Awinflow_NxTxdxd, B_NxTxdxd
def _define_input_to_latent(self, IInput=None): """ Define the map f(I_t) in the evolution equation X_{t+1} = A_j(X_t, I_t)X_t + a_j*f(I_t) from the input to the state space shock. The index j represents the identity of the trial Note that this type of shock is rather limited. In particular, the presence of an input I_t adds the same value f(I_t) independently of the state of the system Args: IInput (tf.Tensor): Inputs at each time point Returns: Iterm_NxTxd: The shocks applied to the homogeneous evolution equation (see above) for each time and trial """ iDim = self.iDim xDim = self.xDim IInput_NxTxi = self.I if IInput is None else IInput Nsamps = tf.shape(IInput_NxTxi)[0] NTbins = tf.shape(IInput_NxTxi)[1] Ids = self.Ids self.input_params_p = tf.get_variable('input_params', shape=[self.num_diff_entities],) tf.add_to_collection('INPUT', self.input_params_p) input_params_N = tf.gather(self.input_params_p, indices=Ids) IInput_NTxi = tf.reshape(IInput_NxTxi, [Nsamps*NTbins, iDim]) fully_connected_layer = FullLayer(collections=['INPUT']) with tf.variable_scope("input_nn", reuse=tf.AUTO_REUSE): full1 = fully_connected_layer(IInput_NTxi, 128, 'relu', 'full1') full = fully_connected_layer(full1, xDim, 'linear', 'full', initializer=tf.random_normal_initializer(stddev=0.1)) # put sample dimension last to broadcast Iterm_dxTxN = tf.transpose(tf.reshape(full, [Nsamps, NTbins, xDim]), [2,1,0]) Iterm_NxTxd = tf.transpose(input_params_N*Iterm_dxTxN, [2,1,0], name='Iterm') return Iterm_NxTxd
def _define_input_to_latent(self, IInput=None): """ Defines the map f(I_t) in the evolution equation X_{t+1} = A(X_t, I_t)X_t + f(I_t) from the input to the state space shock. Note that this type of shock is rather limited, in particular the effect of the additive term does not depend on the current state. The presence of an input I_t adds the same value f(I_t) no matter where the system is. """ iDim = self.iDim xDim = self.xDim IInput_NxTxi = self.I if IInput is None else IInput Nsamps = tf.shape(IInput_NxTxi)[0] NTbins = tf.shape(IInput_NxTxi)[1] Ids = self.Ids self.input_params_p = tf.get_variable( 'input_params', shape=[self.num_diff_entities], ) tf.add_to_collection('INPUT', self.input_params_p) input_params_N = tf.gather(self.input_params_p, indices=Ids) IInput_NTxi = tf.reshape(IInput_NxTxi, [Nsamps * NTbins, iDim]) fully_connected_layer = FullLayer(collections=['INPUT']) with tf.variable_scope("input_nn", reuse=tf.AUTO_REUSE): full1 = fully_connected_layer(IInput_NTxi, 128, 'relu', 'full1') full = fully_connected_layer( full1, xDim, 'linear', 'full', initializer=tf.random_normal_initializer(stddev=0.1)) # put sample dimension last to broadcast Iterm_dxTxN = tf.transpose(tf.reshape(full, [Nsamps, NTbins, xDim]), [2, 1, 0]) Iterm_NxTxd = tf.transpose(input_params_N * Iterm_dxTxN, [2, 1, 0], name='Iterm') return Iterm_NxTxd
def input_to_latent(self, IInput=None): """ """ Nsamps = self.Nsamps NTbins = self.NTbins iDim = self.iDim xDim = self.xDim if IInput is None: IInput = self.I IInput = tf.reshape(IInput, [Nsamps * NTbins], iDim) in_nodes = 64 fully_connected_layer = FullLayer() with tf.variable_scope("input_nn", reuse=tf.AUTO_REUSE): full1 = fully_connected_layer(IInput, in_nodes, iDim, 'softplus', 'full1') full2 = fully_connected_layer(full1, in_nodes, in_nodes, 'softplus', 'full2') full3 = fully_connected_layer(full2, xDim, in_nodes, 'linear') return tf.reshape(full3, [Nsamps, NTbins, xDim])
def bernoulliEncoding(Y, X, yDim, xDim, gen_nodes, learning_rate): DTYPE = tf.float32 #yDim = tf.shape(Y)[1]; #xDim = tf.shape(X)[1]; fullyConnectedLayer = FullLayer() rangeRate1 = 1 / tf.sqrt(tf.cast(xDim, DTYPE)) rangeRate2 = 1 / tf.sqrt(tf.cast(gen_nodes, DTYPE)) with tf.variable_scope("ber_rate_nn", reuse=tf.AUTO_REUSE): full1 = fullyConnectedLayer(X, gen_nodes, nl='tanh', scope='full1', initializer=tf.random_uniform_initializer( minval=-rangeRate1, maxval=rangeRate1)) full2 = fullyConnectedLayer(full1, gen_nodes, nl='tanh', scope='full2', initializer=tf.random_uniform_initializer( minval=-rangeRate2, maxval=rangeRate2)) full = fullyConnectedLayer(full2, yDim, nl='linear', scope='output', initializer=tf.random_uniform_initializer( minval=-rangeRate2, maxval=rangeRate2)) temp = tf.exp(full) rate = temp / (1 + temp) entropy_loss = tf.reduce_sum(Y * full - tf.log(1 + temp)) #-tf.reduce_sum((Y-rate)**2)# with tf.variable_scope("ber_adam", reuse=tf.AUTO_REUSE): optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(-entropy_loss) return optimizer, entropy_loss, rate
def sngRREncoding(Y, X, yDim, xDim, learning_rate, factor): DTYPE = tf.float32 #yDim = tf.shape(Y)[1]; #xDim = tf.shape(X)[1]; fullyConnectedLayer = FullLayer() rangeRate1 = 1 / tf.sqrt(tf.cast(xDim, DTYPE)) with tf.variable_scope("sngrr_rate_nn", reuse=tf.AUTO_REUSE): full_theta = fullyConnectedLayer( X, yDim, nl='linear', scope='output_theta', initializer=tf.random_uniform_initializer(minval=-rangeRate1, maxval=rangeRate1)) full_p = fullyConnectedLayer( X, yDim, nl='linear', scope='output_p', initializer=tf.random_uniform_initializer(minval=-rangeRate1, maxval=rangeRate1)) full_k = fullyConnectedLayer( X, yDim, nl='linear', scope='output_k', initializer=tf.random_uniform_initializer(minval=-rangeRate1, maxval=rangeRate1)) params = dict([("loc", tf.convert_to_tensor(factor, DTYPE))]) with tf.variable_scope("sngrr_obsmodel", reuse=tf.AUTO_REUSE): #if "logk" in params: # logk = tf.get_variable('logk', initializer=tf.cast(params["logk"], DTYPE), dtype=DTYPE) #else: # logk = tf.get_variable('logk', initializer=tf.cast(tf.zeros(yDim), DTYPE), dtype=DTYPE) #k = tf.exp(logk) + 1e-7; if "loc" in params: loc = tf.cast(params["loc"], DTYPE) else: loc = tf.cast(tf.zeros(yDim), DTYPE) #self.loc = tf.minimum(self.loc, tf.cast(params["min_y"], DTYPE)-1e-6); k = tf.exp(full_k) + 1e-7 theta = tf.exp(full_theta) p = tf.exp(full_p) / (1 + tf.exp(full_p)) # now compute the entropy loss Nsamps = tf.shape(Y)[0] mask = tf.not_equal(Y, tf.zeros_like(Y)) #k_NTxD = tf.reshape(tf.tile(k, [Nsamps]), [Nsamps, yDim]); loc_NTxD = tf.reshape(tf.tile(loc, [Nsamps]), [Nsamps, yDim]) y_temp = tf.boolean_mask(Y, mask) r_temp = tf.boolean_mask(theta, mask) p_temp = tf.boolean_mask(p, mask) k_NTxD = tf.boolean_mask(k, mask) loc_NTxD = tf.boolean_mask(loc_NTxD, mask) p_temp = p_temp * (1 - 2e-6) + 1e-6 r_temp = r_temp + 1e-6 LY1 = tf.reduce_sum( tf.log(p_temp) - k_NTxD * tf.log(r_temp) - (y_temp - loc_NTxD) / r_temp) LY2 = tf.reduce_sum(-tf.lgamma(k_NTxD) + (k_NTxD - 1) * tf.log(y_temp - loc_NTxD)) gr_temp = tf.boolean_mask(p, ~mask) LY3 = tf.reduce_sum(tf.log(1 - gr_temp)) entropy_loss = LY1 + LY2 + LY3 with tf.variable_scope("sngrr_adam", reuse=tf.AUTO_REUSE): optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(-entropy_loss) rate = (theta * k + loc) * p return optimizer, entropy_loss, theta, k, p, loc, rate
import numpy as np import matplotlib.pyplot as plt from layers import (FullLayer, ReluLayer, SoftMaxLayer, CrossEntropyLayer) from layers.sequential import Sequential from layers.dataset import cifar100 (x_train, y_train), (x_test, y_test) = cifar100(1213268041) test_accuracy=[] epochs=20 lr=[0.1, 0.01, 0.2] for i in lr: model = Sequential(layers=(FullLayer(32 * 32 * 3, 2500), ReluLayer(), FullLayer(2500, 2000), ReluLayer(), FullLayer(2000, 1500), ReluLayer(), FullLayer(1500, 1000), ReluLayer(), FullLayer(1000, 500), ReluLayer(), FullLayer(500, 4), SoftMaxLayer()), loss=CrossEntropyLayer()) finalloss = model.fit(x_train, y_train,epochs=20, lr=i) y_pred = model.predict(x_test) accuracy=(np.mean(y_test == y_pred)) print('Accuracy: %.2f' % accuracy) np.append(test_accuracy, accuracy) plt.plot(range(epochs), finalloss, label=('Learning rate=',i))
def get_Mu_Lambda(self, InputY): """ """ yDim = self.yDim xDim = self.xDim Nsamps = tf.shape(InputY)[0] NTbins = tf.shape(InputY)[1] rangeLambda = self.params.initrange_LambdaX rangeX = self.params.initrange_MuX rec_nodes = 60 Y_input_NTxD = tf.reshape(InputY, [Nsamps * NTbins, yDim]) fully_connected_layer = FullLayer() with tf.variable_scope("recog_nn_mu", reuse=tf.AUTO_REUSE): full1 = fully_connected_layer( Y_input_NTxD, rec_nodes, 'softplus', 'full1', initializer=tf.random_normal_initializer(stddev=rangeX)) full2 = fully_connected_layer( full1, rec_nodes, 'softplus', 'full2', initializer=tf.random_normal_initializer(stddev=rangeX)) Mu_NTxd = fully_connected_layer(full2, xDim, 'linear', 'output') Mu_NxTxd = tf.reshape(Mu_NTxd, [Nsamps, NTbins, xDim], name='MuX') with tf.variable_scope("recog_nn_lambda", reuse=tf.AUTO_REUSE): full1 = fully_connected_layer( Y_input_NTxD, rec_nodes, 'softplus', 'full1', initializer=tf.random_normal_initializer(stddev=rangeLambda)) full2 = fully_connected_layer( full1, rec_nodes, 'softplus', 'full2', initializer=tf.random_normal_initializer(stddev=rangeLambda)) full3 = fully_connected_layer( full2, xDim**2, 'linear', 'output', initializer=tf.orthogonal_initializer(gain=rangeLambda)) # initializer=tf.random_uniform_initializer(-0.01, 0.01)) LambdaChol_NTxdxd = tf.reshape(full3, [Nsamps * NTbins, xDim, xDim]) Lambda_NTxdxd = tf.matmul(LambdaChol_NTxdxd, LambdaChol_NTxdxd, transpose_b=True) Lambda_NxTxdxd = tf.reshape(Lambda_NTxdxd, [Nsamps, NTbins, xDim, xDim], name='Lambda') LambdaMu_NTxd = tf.squeeze(tf.matmul(Lambda_NTxdxd, tf.expand_dims(Mu_NTxd, axis=2)), axis=2) LambdaMu_NxTxd = tf.reshape(LambdaMu_NTxd, [Nsamps, NTbins, xDim]) return Mu_NxTxd, Lambda_NxTxdxd, LambdaMu_NxTxd
def add_a_common_full_layer(self): mean = self.init_mean() std = self.init_std() full_layer = FullLayer(hidden_neuron_num=2, weight_matrix=[mean, std]) return full_layer
""" Created on Sun Mar 25 19:52:43 2018 @author: kaushik """ import time import numpy as np import matplotlib.pyplot as plt from layers.dataset import cifar100 from layers import (ConvLayer, FullLayer, FlattenLayer, MaxPoolLayer, ReluLayer, SoftMaxLayer, CrossEntropyLayer, Sequential) (x_train, y_train), (x_test, y_test) = cifar100(1337) model = Sequential(layers=(ConvLayer(3, 16, 3), ReluLayer(), MaxPoolLayer(), ConvLayer(16, 32, 3), ReluLayer(), MaxPoolLayer(), FlattenLayer(), FullLayer(8 * 8 * 32, 4), SoftMaxLayer()), loss=CrossEntropyLayer()) start_time = time.clock() lr_vals = [0.1] losses_train = list() losses_test = list() test_acc = np.zeros(len(lr_vals)) for j in range(len(lr_vals)): train_loss, test_loss = model.fit(x_train, y_train, x_test, y_test, epochs=8, lr=lr_vals[j], batch_size=128) losses_train.append(train_loss)
def _define_mean_variance(self, Input=None): """ Define the mean and variance of the Gaussian Observation Model Returns: A pair containing: - MuY_NxTxD: The means of the Gaussian Observation Model - SigmaInv_DxD: The variance of the Gaussian Observation Model """ params = self.params if Input is None: Input = self.X xDim = self.xDim yDim = self.yDim Nsamps = tf.shape(Input)[0] NTbins = tf.shape(Input)[1] Input = tf.reshape(Input, [Nsamps * NTbins, xDim], name='X_input') rangeY = tf.get_variable('rangeY', initializer=params.initrange_Goutmean) initSigma = params.initrange_Goutvar init_b = params.initbias_Goutmean obs_nodes = 64 fully_connected_layer = FullLayer() with tf.variable_scope("obs_nn_mean", reuse=tf.AUTO_REUSE): if params.is_identity_output: MuY_NTxD = rangeY * Input[:, :yDim] elif params.is_linear_output: MuY_NTxD = fully_connected_layer(Input, yDim, 'linear', scope='output') else: full1 = fully_connected_layer(Input, obs_nodes, 'softplus', 'full1') full2 = fully_connected_layer(full1, obs_nodes, 'softplus', 'full2') MuY_NTxD = fully_connected_layer( full2, yDim, 'linear', 'output', # initializer=tf.random_uniform_initializer(-rangeY, rangeY), b_initializer=tf.random_normal_initializer(init_b)) MuY_NxTxD = tf.reshape(MuY_NTxD, [Nsamps, NTbins, yDim], name='outY') with tf.variable_scope("obs_var", reuse=tf.AUTO_REUSE): SigmaInvChol_DxD = tf.get_variable('SigmaInvChol', initializer=tf.cast( initSigma * tf.eye(yDim), DTYPE)) self.SigmaChol_1x1xDxD = tf.reshape( tf.matrix_inverse(SigmaInvChol_DxD), [1, 1, yDim, yDim]) # Needed only for sampling SigmaInv_DxD = tf.matmul(SigmaInvChol_DxD, SigmaInvChol_DxD, transpose_b=True) return MuY_NxTxD, SigmaInv_DxD
import numpy as np from layers.dataset import cifar100 # Please make sure that cifar-100-python is present in the same folder as dataset.py (x_train, y_train), (x_test, y_test) = cifar100(1212356299) from layers import (FullLayer, ReluLayer, SoftMaxLayer, CrossEntropyLayer, Sequential_better) model = Sequential_better(layers=(FullLayer(3072, 1500), ReluLayer(), FullLayer(1500, 500), ReluLayer(), FullLayer(500, 4), SoftMaxLayer()), loss=CrossEntropyLayer()) loss2 = model.fit(x_train, y_train, lr=0.1, epochs=15) y_predict = model.predict(x_test) count = 0 for i in range(np.size(y_test)): if y_predict[i] == y_test[i]: count += 1 accuracy = (100.0 * count) / np.shape(y_predict)[0] print "Accuracy of better CIFAR = ", accuracy, "%"
import numpy as np from layers.dataset import cifar100 import matplotlib.pyplot as plt # Please make sure that cifar-100-python is present in the same folder as dataset.py (x_train, y_train), (x_test, y_test) = cifar100(1212356299) from layers import (FullLayer, ReluLayer, SoftMaxLayer, CrossEntropyLayer, Sequential) model = Sequential(layers=(FullLayer(3072, 500), ReluLayer(), FullLayer(500, 4), SoftMaxLayer()), loss=CrossEntropyLayer()) lr_accuracies = np.zeros((3, )) loss1 = model.fit(x_train, y_train, lr=0.01, epochs=15) y_predict = model.predict(x_test) count = 0 for i in range(np.size(y_test)): if y_predict[i] == y_test[i]: count += 1 lr_accuracies[0] = (100.0 * count) / np.shape(y_predict)[0] loss2 = model.fit(x_train, y_train, lr=0.1, epochs=15) y_predict = model.predict(x_test)
def sngEncoding(Y, X, yDim, xDim, learning_rate, factor, gamma_np): DTYPE = tf.float32 #yDim = tf.shape(Y)[1]; #xDim = tf.shape(X)[1]; fullyConnectedLayer = FullLayer() rangeRate1 = 1 / tf.sqrt(tf.cast(xDim, DTYPE)) with tf.variable_scope("sng_rate_nn", reuse=tf.AUTO_REUSE): full = fullyConnectedLayer(X, yDim, nl='linear', scope='output', initializer=tf.random_uniform_initializer( minval=-rangeRate1, maxval=rangeRate1)) #mean_temp, var_temp = tf.nn.moments(Y, axes=0); #gamma_temp = mean_temp/var_temp; #gamma_temp = tf.reduce_mean(Y, axis=0)/#Y.mean(axis=0)/Y.var(axis=0) gamma_temp = tf.cast(tf.log(gamma_np), DTYPE) params = dict([("loggamma", gamma_temp), ("loc", tf.convert_to_tensor(factor, DTYPE))]) with tf.variable_scope("sng_obsmodel", reuse=tf.AUTO_REUSE): if "loggamma" in params: loggamma = tf.get_variable('loggamma', initializer=tf.cast( params["loggamma"], DTYPE), dtype=DTYPE) else: loggamma = tf.get_variable('loggamma', initializer=tf.cast( tf.zeros(yDim), DTYPE), dtype=DTYPE) gamma = tf.exp(loggamma) + 1e-7 if "logk" in params: logk = tf.get_variable('logk', initializer=tf.cast(params["logk"], DTYPE), dtype=DTYPE) else: logk = tf.get_variable('logk', initializer=tf.cast(tf.zeros(yDim), DTYPE), dtype=DTYPE) k = tf.exp(logk) + 1e-7 if "loc" in params: loc = tf.cast(params["loc"], DTYPE) else: loc = tf.cast(tf.zeros(yDim), DTYPE) #self.loc = tf.minimum(self.loc, tf.cast(params["min_y"], DTYPE)-1e-6); rate = tf.exp(full) # now compute the entropy loss Nsamps = tf.shape(Y)[0] mask = tf.not_equal(Y, tf.zeros_like(Y)) k_NTxD = tf.reshape(tf.tile(k, [Nsamps]), [Nsamps, yDim]) loc_NTxD = tf.reshape(tf.tile(loc, [Nsamps]), [Nsamps, yDim]) gamma_rate = rate * gamma y_temp = tf.boolean_mask(Y, mask) r_temp = tf.boolean_mask(rate, mask) gr_temp = tf.boolean_mask(gamma_rate, mask) p_temp = 1 - tf.exp(-gr_temp) k_NTxD = tf.boolean_mask(k_NTxD, mask) loc_NTxD = tf.boolean_mask(loc_NTxD, mask) r_temp = r_temp - loc_NTxD * p_temp p_temp = p_temp * (1 - 2e-6) + 1e-6 r_temp = r_temp + 1e-6 LY1 = tf.reduce_sum((k_NTxD + 1) * tf.log(p_temp) - k_NTxD * tf.log(r_temp) - (y_temp - loc_NTxD) * k_NTxD * p_temp / r_temp) LY2 = tf.reduce_sum((k_NTxD * tf.log(k_NTxD) - tf.lgamma(k_NTxD)) + (k_NTxD - 1) * tf.log(y_temp - loc_NTxD)) gr_temp = tf.boolean_mask(gamma_rate, ~mask) LY3 = -tf.reduce_sum(gr_temp) entropy_loss = LY1 + LY2 + LY3 with tf.variable_scope("sng_adam", reuse=tf.AUTO_REUSE): optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(-entropy_loss) return optimizer, entropy_loss, rate, k, gamma, loc
def _define_evolution_network_wi(self, X=None, Ids=None, Inputs=None): """ Define the evolution network for each of a set of NsampsxNTbins latent points and Ids. Args: X (tf.Tensor): Ids (tf.Tensor): Inputs (tf.Tensor): Returns: A tuple containing: - A_NxTxdxd: The locally linear evolution map for each latent state provided. - Awinflow_NxTxdxd: The locally linear evolution map for each latent state, to which an inwards flow from infinity has been added. This guarantees a nonlinear dynamics that does not blow up. This is only used for the generation of synthetic data, not for training. - B_NxTxdxd: The state-dependent piece of A_NxTxdxd: A(X) = A_0 + alpha*B(X) B_NxTxdxd can always be derived from A_NxTxdxd but it is often handy to just get it from here . """ params = self.params xDim = self.xDim pDim = self.pDim if params.with_inputs: iDim = self.iDim rDim = xDim + pDim + iDim if params.with_mod_dynamics else xDim + pDim if X is None and Ids is not None: raise ValueError("Must provide an X for these Ids") X_NxTxd = self.X if X is None else X Nsamps = tf.shape(X_NxTxd)[0] NTbins = tf.shape(X_NxTxd)[1] if Ids is None: Ids = self.Ids ev_params_Nxp = tf.gather(self.ev_params_Pxp, indices=Ids) # expand according to Ids ev_params_NxTxp = tf.tile(tf.expand_dims(ev_params_Nxp, axis=1), [1, NTbins, 1]) if params.with_mod_dynamics: Inputs_NxTxi = self.I if Inputs is None else Inputs State_NxTxr = tf.concat([X_NxTxd, ev_params_NxTxp, Inputs_NxTxi], axis=2) # add params and inputs to state else: State_NxTxr = tf.concat([X_NxTxd, ev_params_NxTxp], axis=2) State_NTxr = tf.reshape(State_NxTxr, [Nsamps*NTbins, rDim]) rangeB = self.params.initrange_B evnodes = 200 fully_connected_layer = FullLayer(collections=['EVOLUTION_PARS']) with tf.variable_scope("ev_nn", reuse=tf.AUTO_REUSE): full1 = fully_connected_layer(State_NTxr, evnodes, 'softmax', 'full1') full2 = fully_connected_layer(full1, evnodes//2, 'softplus', 'full2', initializer=tf.orthogonal_initializer()) output = fully_connected_layer(full2, xDim**2, nl='linear', scope='output', initializer=tf.random_uniform_initializer(-rangeB, rangeB)) B_NxTxdxd = tf.reshape(output, [Nsamps, NTbins, xDim, xDim], name='B') B_NTxdxd = tf.reshape(output, [Nsamps*NTbins, xDim, xDim]) A_NTxdxd = self.alpha*B_NTxdxd + self.Alinear_dxd # Broadcast A_NxTxdxd = tf.reshape(A_NTxdxd, [Nsamps, NTbins, xDim, xDim], name='A') X_norms = tf.norm(State_NTxr[:,:xDim], axis=1) fl_mod = flow_modulator_tf(X_norms) eye_swap = tf.transpose(tf.tile(tf.expand_dims(tf.eye(self.xDim), 0), [Nsamps*NTbins, 1, 1]), [2,1,0]) Awinflow_NTxdxd = tf.transpose(fl_mod*tf.transpose( A_NTxdxd, [2,1,0]) + 0.9*(1.0 - fl_mod)*eye_swap, [2,1,0]) Awinflow_NxTxdxd = tf.reshape(Awinflow_NTxdxd, [Nsamps, NTbins, xDim, xDim], name='Awinflow') return A_NxTxdxd, Awinflow_NxTxdxd, B_NxTxdxd
def get_Mu_Lambda(self, InputY): """ Define the mappings Mu(Y) and Lambda(Y) for the mean and precision of the Recognition Dsitribution respectively. Args: InputY (tf.Tensor): The observation tensor. Returns: A tuple containing: - Mu_NxTxd: The mean of the Recognition Distribution. - Lambda_NxTxdxd: The precision of the Recognition Distribution. - LambdaMu_NxTxd: The matrix product Lambda*Mu from the precision and the mean. Useful for the Inference Algorithm """ yDim = self.yDim xDim = self.xDim Nsamps = tf.shape(InputY)[0] NTbins = tf.shape(InputY)[1] rangeLambda = self.params.initrange_LambdaX rangeX = self.params.initrange_MuX rec_nodes = 60 Y_input_NTxD = tf.reshape(InputY, [Nsamps * NTbins, yDim]) fcl = FullLayer() with tf.variable_scope("recog_nn_mu", reuse=tf.AUTO_REUSE): full1 = fcl( Y_input_NTxD, rec_nodes, 'softplus', 'full1', initializer=tf.random_normal_initializer(stddev=rangeX)) full2 = fcl( full1, rec_nodes, 'softplus', 'full2', initializer=tf.random_normal_initializer(stddev=rangeX)) Mu_NTxd = fcl(full2, xDim, 'linear', 'output') Mu_NxTxd = tf.reshape(Mu_NTxd, [Nsamps, NTbins, xDim], name='MuX') with tf.variable_scope("recog_nn_lambda", reuse=tf.AUTO_REUSE): full1 = fcl( Y_input_NTxD, rec_nodes, 'softplus', 'full1', initializer=tf.random_normal_initializer(stddev=rangeLambda)) full2 = fcl( full1, rec_nodes, 'softplus', 'full2', initializer=tf.random_normal_initializer(stddev=rangeLambda)) full3 = fcl( full2, xDim**2, 'linear', 'output', initializer=tf.orthogonal_initializer(gain=rangeLambda)) # initializer=tf.random_uniform_initializer(-0.01, 0.01)) LambdaChol_NTxdxd = tf.reshape(full3, [Nsamps * NTbins, xDim, xDim]) Lambda_NTxdxd = tf.matmul(LambdaChol_NTxdxd, LambdaChol_NTxdxd, transpose_b=True) Lambda_NxTxdxd = tf.reshape(Lambda_NTxdxd, [Nsamps, NTbins, xDim, xDim], name='Lambda') LambdaMu = tf.matmul(Lambda_NTxdxd, tf.expand_dims(Mu_NTxd, axis=2)) LambdaMu_NTxd = tf.squeeze(LambdaMu, axis=2) LambdaMu_NxTxd = tf.reshape(LambdaMu_NTxd, [Nsamps, NTbins, xDim]) return Mu_NxTxd, Lambda_NxTxdxd, LambdaMu_NxTxd
def _define_evolution_network_wi(self, X=None, Ids=None, Inputs=None): """ """ params = self.params xDim = self.xDim pDim = self.pDim if params.with_inputs: iDim = self.iDim rDim = xDim + pDim + iDim if params.with_mod_dynamics else xDim + pDim if X is None and Ids is not None: raise ValueError("Must provide an X for these Ids") X_NxTxd = self.X if X is None else X if Ids is None: Ids = self.Ids Nsamps = tf.shape(X_NxTxd)[0] NTbins = tf.shape(X_NxTxd)[1] # Expand the parameters according to the provided trial Ids ev_params_Nxp = tf.gather(self.ev_params_Pxp, indices=Ids) ev_params_NxTxp = tf.tile(tf.expand_dims(ev_params_Nxp, axis=1), [1, NTbins, 1]) if params.with_mod_dynamics: Inputs_NxTxi = self.I if Inputs is None else Inputs rangeB = self.params.initrange_B evnodes = 200 # Concatenate the parameters to the state at time t State_NxTxr = (tf.concat([X_NxTxd, ev_params_NxTxp, Inputs_NxTxi], axis=2) if params.with_mod_dynamics else tf.concat([X_NxTxd, ev_params_NxTxp], axis=2)) State_NTxr = tf.reshape(State_NxTxr, [Nsamps * NTbins, rDim]) fully_connected_layer = FullLayer(collections=['EVOLUTION_PARS']) with tf.variable_scope("ev_nn", reuse=tf.AUTO_REUSE): full1 = fully_connected_layer(State_NTxr, evnodes, 'softmax', 'full1') full2 = fully_connected_layer( full1, evnodes // 2, 'softplus', 'full2', initializer=tf.orthogonal_initializer()) output = fully_connected_layer( full2, xDim**2, nl='linear', scope='output', initializer=tf.random_uniform_initializer(-rangeB, rangeB)) B_NxTxdxd = tf.reshape(output, [Nsamps, NTbins, xDim, xDim], name='B') B_NTxdxd = tf.reshape(output, [Nsamps * NTbins, xDim, xDim]) A_NTxdxd = self.alpha * B_NTxdxd + self.Alinear_dxd # Broadcast A_NxTxdxd = tf.reshape(A_NTxdxd, [Nsamps, NTbins, xDim, xDim], name='A') X_norms = tf.norm(State_NTxr[:, :xDim], axis=1) fl_mod = flow_modulator_tf(X_norms) eye_swap = tf.transpose( tf.tile(tf.expand_dims(tf.eye(self.xDim), 0), [Nsamps * NTbins, 1, 1]), [2, 1, 0]) Awinflow_NTxdxd = tf.transpose( fl_mod * tf.transpose(A_NTxdxd, [2, 1, 0]) + 0.9 * (1.0 - fl_mod) * eye_swap, [2, 1, 0]) Awinflow_NxTxdxd = tf.reshape(Awinflow_NTxdxd, [Nsamps, NTbins, xDim, xDim], name='Awinflow') return A_NxTxdxd, Awinflow_NxTxdxd, B_NxTxdxd