def main(): # generate dummy dataset nframes = 10000 dataset = np.random.normal(loc=np.zeros(input_dim), scale=np.ones(input_dim), size=(nframes, input_dim)) # split into train and test portion ntest = 1000 X_train = dataset[:-ntest :] # all but last 1000 samples for training X_test = dataset[-ntest:, :] # last 1000 samples for testing assert X_train.shape[0] >= X_test.shape[0], 'Train set should be at least size of test set!' print('Creating training model...') #if persistent is True, you need tospecify batch_size rbm = RBM(hidden_dim, input_dim=input_dim, init=glorot_uniform_sigm, hidden_unit_type='binary', visible_unit_type='gaussian', nb_gibbs_steps=nb_gibbs_steps, persistent=True, batch_size=batch_size, dropout=0.5) train_model = Sequential() train_model.add(rbm) opt = SGD(lr, 0., decay=0.0, nesterov=False) loss = rbm.contrastive_divergence_loss metrics = [rbm.reconstruction_loss] logger = UnsupervisedLoss2Logger(X_train, X_test, rbm.free_energy_gap, verbose=1, label='free_eng_gap', batch_size=batch_size) callbacks = [logger] # compile theano graph print('Compiling Theano graph...') train_model.compile(optimizer=opt, loss=loss, metrics=metrics) # do training print('Training...') train_model.fit(X_train, X_train, batch_size, nb_epoch, verbose=1, shuffle=False, callbacks=callbacks) # generate hidden features from input data print('Creating inference model...') h_given_x = rbm.get_h_given_x_layer(as_initial_layer=True) inference_model = Sequential() inference_model.add(h_given_x) #inference_model.add(SampleBernoulli(mode='maximum_likelihood')) print('Compiling Theano graph...') inference_model.compile(opt, loss='mean_squared_error') print('Doing inference...') h = inference_model.predict(dataset) print(h) print('Done!')
def main(): # generate dummy dataset nframes = 10000 dataset = np.random.normal(loc=np.zeros(input_dim), scale=np.ones(input_dim), size=(nframes, input_dim)) # split into train and test portion ntest = 1000 X_train = dataset[:-ntest:] # all but last 1000 samples for training X_test = dataset[-ntest:, :] # last 1000 samples for testing assert X_train.shape[0] >= X_test.shape[ 0], 'Train set should be at least size of test set!' # setup model structure print('Creating training model...') rbm1 = RBM(hidden_dim[0], input_dim=input_dim, init=glorot_uniform_sigm, visible_unit_type='gaussian', hidden_unit_type='binary', nb_gibbs_steps=nb_gibbs_steps, persistent=True, batch_size=batch_size, dropout=dropouts[0]) rbm2 = RBM(hidden_dim[1], input_dim=hidden_dim[0], init=glorot_uniform_sigm, visible_unit_type='binary', hidden_unit_type='binary', nb_gibbs_steps=nb_gibbs_steps, persistent=True, batch_size=batch_size, dropout=dropouts[1]) #When using nrlu unit, nb_gibbs_steps and persistent param are ignored rbm3 = RBM(hidden_dim[2], input_dim=hidden_dim[1], init=glorot_uniform_sigm, visible_unit_type='binary', hidden_unit_type='nrlu', nb_gibbs_steps=1, persistent=False, batch_size=batch_size, dropout=dropouts[2]) rbms = [rbm1, rbm2, rbm3] dbn = DBN(rbms) # setup optimizer, loss def get_layer_loss(rbm, layer_no): return rbm.contrastive_divergence_loss def get_layer_optimizer(layer_no): return SGD((layer_no + 1) * lr, 0., decay=0.0, nesterov=False) metrics = [] for rbm in rbms: metrics.append([rbm.reconstruction_loss]) dbn.compile(layer_optimizer=get_layer_optimizer, layer_loss=get_layer_loss, metrics=metrics) # do training print('Training...') dbn.fit(X_train, batch_size, nb_epoch, verbose=1, shuffle=False) # generate hidden features from input data print('Creating inference model...') F = dbn.get_forward_inference_layers() B = dbn.get_backward_inference_layers() inference_model = Sequential() for f in F: inference_model.add(f) inference_model.add(SampleBernoulli(mode='random')) for b in B[:-1]: inference_model.add(b) inference_model.add(SampleBernoulli(mode='random')) # last layer is a gaussian layer inference_model.add(B[-1]) print('Compiling Theano graph...') opt = SGD() inference_model.compile(opt, loss='mean_squared_error') print('Doing inference...') h = inference_model.predict(dataset) print(h) print('Done!')
input_dim = cos.shape[2] print('Input shape:', cos.shape) expected_output = np.zeros((len(cos), 1)) for i in range(len(cos) - lahead): expected_output[i, 0] = np.mean(cos[i + 1:i + lahead + 1]) print('Output shape') print(expected_output.shape) print('Training Pretrain RBM') rbm = RBM(hidden_dim, init=glorot_uniform_sigm, input_dim=input_dim, hidden_unit_type='binary', visible_unit_type='gaussian', persistent=True, batch_size=batch_size, nb_gibbs_steps=10 ) model = Sequential() model.add(Flatten(batch_input_shape=(batch_size, tsteps, input_dim))) model.add(rbm) opt = RMSprop(lr=lr) model.compile(loss=rbm.contrastive_divergence_loss, optimizer=opt, metrics=[rbm.reconstruction_loss]) model.summary()
def main(): #grab input data set and set up dataset here X_train = [] X_test = [] print('Creating training model') #start with a GBRBM and then followed by 5 more RBMs for 5*2 = 10 hidden layers dbn = DBN([ GBRBM(input_dim, internal_dim, init=glorot_uniform_sigm), RBM(internal_dim, internal_dim, init=glorot_uniform_sigm), RBM(internal_dim, internal_dim, init=glorot_uniform_sigm), RBM(internal_dim, internal_dim, init=glorot_uniform_sigm), RBM(internal_dim, internal_dim, init=glorot_uniform_sigm), RBM(internal_dim, internal_dim, init=glorot_uniform_sigm) ]) def get_layer_loss(rbm, layer_no): return rbm.contrastive_divergence_loss(nb_gibbs_steps=1) def get_layer_optimizer(layer_no): return SGD((layer_no + 1) * lr, 0., decay=0.0, nesterov=False) dbn.compile(layer_optimizer=get_layer_optimizer, layer_loss=get_layer_loss) #Train #train off token vectors from early version of software print('Training') begin_time = time.time() dbn.fit(X_train, batch_size, nb_epoch, verbose=1, shuffle=False) end_time = time.time() print('Training took %f minutes' % ((end_time - begin_time) / 60.0)) #save model parameters from training print('Saving model') dbn.save_weights('dbn_weights.hdf5', overwrite=True) #load model from save print('Loading model') dbn.load_weights('dbn_weights.hdf5') #generate hidden features from input data print('Creating inference model') F = dbn.get_forward_inference_layers() B = dbn.get_backwards_inference_layers() inference_model = Sequential() for f in F: inference_model.add(f) inference_model.add(SampleBernoulli(mode='random')) for b in B[:-1]: inference_model.add(b) inference_model.add(SampleBernoulli(mode='random')) #last layer is a gaussian layer inference_model.add(B[-1]) print('Compiling Theano graph') opt = SGD() inference_model.compile(opt, loss='mean_squared_error') print('Doing inference') h = inference_model.predict(X_test)
def call(self, x, mask=None): input_shape = self.input_spec[0].shape if self.unroll and input_shape[1] is None: raise ValueError('Cannot unroll a RNN if the ' 'time dimension is undefined. \n' '- If using a Sequential model, ' 'specify the time dimension by passing ' 'an `input_shape` or `batch_input_shape` ' 'argument to your first layer. If your ' 'first layer is an Embedding, you can ' 'also use the `input_length` argument.\n' '- If using the functional API, specify ' 'the time dimension by passing a `shape` ' 'or `batch_shape` argument to your Input layer.') if self.stateful: initial_states = self.states else: initial_states = self.get_initial_states(x) constants = self.get_constants(x) preprocessed_input = self.preprocess_input(x) last_output, outputs, states = K.rnn(self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=input_shape[1]) if self.stateful: updates = [] for i in range(len(states)): updates.append((self.states[i], states[i])) u_t = states[0] bv_t = states[1] bh_t = states[2] if(not self.finetune): self.rbm_rnn = RBM(self.hidden_dim,init=glorot_uniform_sigm, input_dim=self.input_dim, hidden_unit_type='binary', visible_unit_type='gaussian', persistent=self.persistent, batch_size=self.batch_input_shape[0], nb_gibbs_steps=self.nb_gibbs_steps, Wrbm=self.Wrbm, bx=bv_t, bh=bh_t, dropout=self.dropout_RBM) self.rbm_rnn.build([input_shape[0], self.input_dim]) self.loss = self.rbm_rnn.contrastive_divergence_loss self.metrics = self.rbm_rnn.reconstruction_loss x = K.reshape(x, (-1, self.input_dim)) if(not self.finetune): return x else: #return K.sigmoid(K.dot(x, self.Wrbm) + bh_t) return K.dot(x, self.Wrbm) + bh_t
class RNNRBM(Recurrent): def __init__(self, hidden_dim, hidden_recurrent_dim, init='glorot_uniform', inner_init='orthogonal', activation='tanh', W_regularizer=None, U_regularizer=None, b_regularizer=None, dropout_W=0., dropout_U=0., nb_gibbs_steps=1, persistent=False, finetune=False, Wrbm_regularizer=None, rbm=None, dropout_RBM=0., **kwargs): self.init = initializers.get(init) self.init_rbm = glorot_uniform_sigm self.inner_init = initializers.get(inner_init) self.activation = activations.get(activation) self.W_regularizer = regularizers.get(W_regularizer) self.U_regularizer = regularizers.get(U_regularizer) self.b_regularizer = regularizers.get(b_regularizer) self.dropout_W, self.dropout_U = dropout_W, dropout_U self.dropout_RBM = dropout_RBM self.Wrbm_regularizer = regularizers.get(Wrbm_regularizer) self.rbm = rbm if self.dropout_W or self.dropout_U or self.dropout_RBM: self.uses_learning_phase = True self.supports_masking = True super(RNNRBM, self).__init__(**kwargs) self.finetune = finetune self.hidden_dim = hidden_dim self.hidden_recurrent_dim = hidden_recurrent_dim self.nb_gibbs_steps = nb_gibbs_steps self.persistent = persistent def get_output_shape_for(self, input_shape): #assert input_shape and len(input_shape) == 2 return (input_shape[0], self.output_dim) def build(self, input_shape): self.input_spec = [InputSpec(shape=input_shape)] input_dim = input_shape[2] self.input_dim = input_dim if self.stateful: self.reset_states() else: self.states = [None, None, None] self.states_dim = [self.hidden_recurrent_dim, self.input_dim, self.hidden_dim] if(not self.finetune): self.output_dim = self.input_dim else: self.output_dim = self.hidden_dim if(not hasattr(self, 'W')): self.W = self.add_weight((input_dim, self.hidden_recurrent_dim), initializer=self.init, name='{}_W'.format(self.name), regularizer=self.W_regularizer) self.U = self.add_weight((self.hidden_recurrent_dim, self.hidden_recurrent_dim), initializer=self.inner_init, name='{}_U'.format(self.name), regularizer=self.U_regularizer) self.b = self.add_weight((self.hidden_recurrent_dim,), initializer='zero', name='{}_b'.format(self.name), regularizer=self.b_regularizer) if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights if(self.rbm): self.Wrbm = self.rbm.Wrbm self.bv = self.rbm.bx self.bh = self.rbm.bh else: self.Wrbm = self.add_weight((input_dim, self.hidden_dim), initializer=self.init_rbm, name='{}_Wrbm'.format(self.name), regularizer=self.Wrbm_regularizer) self.bv = self.add_weight((self.input_dim,), initializer='zero', name='{}_bv'.format(self.name), regularizer=None) self.bh = self.add_weight((self.hidden_dim,), initializer='zero', name='{}_bh'.format(self.name), regularizer=None) self.Wuv = self.add_weight((self.hidden_recurrent_dim, input_dim), initializer=self.init, name='{}_Wuv'.format(self.name), regularizer=None) self.Wuh = self.add_weight((self.hidden_recurrent_dim, self.hidden_dim), initializer=self.init, name='{}_Wuh'.format(self.name), regularizer=None) self.trainable_weights = [self.W, self.U, self.b, self.Wrbm, self.Wuh, self.bh] if(not self.finetune): self.trainable_weights.append(self.Wuv) self.trainable_weights.append(self.bv) self.built = True def reset_states(self): assert self.stateful, 'Layer must be stateful.' input_shape = self.input_spec[0].shape if not input_shape[0]: raise Exception('If a RNN is stateful, a complete ' + 'input_shape must be provided (including batch size).') if hasattr(self, 'states'): K.set_value(self.states[0], np.zeros((input_shape[0], self.hidden_recurrent_dim))) K.set_value(self.states[1], np.zeros((input_shape[0], self.input_dim))) K.set_value(self.states[2], np.zeros((input_shape[0], self.hidden_dim))) else: self.states = [K.zeros((input_shape[0], self.hidden_recurrent_dim)), K.zeros((input_shape[0], self.input_dim)), K.zeros((input_shape[0], self.hidden_dim))] def preprocess_input(self, x): if self.consume_less == 'cpu': input_shape = K.int_shape(x) input_dim = input_shape[2] timesteps = input_shape[1] return time_distributed_dense(x, self.W, self.b, self.dropout_W, input_dim, self.hidden_recurrent_dim, timesteps) else: return x def step(self, x, states): u_tm1 = states[0] B_U = states[3] B_W = states[4] bv_t = self.bv + K.dot(u_tm1, self.Wuv) bh_t = self.bh + K.dot(u_tm1, self.Wuh) if self.consume_less == 'cpu': h = x else: h = self.b + K.dot(x * B_W, self.W) u_t = self.activation(h + K.dot(u_tm1 * B_U, self.U)) return x, [u_t, bv_t, bh_t] def get_constants(self, x): constants = [] if 0 < self.dropout_U < 1: ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1))) ones = K.tile(ones, (1, self.hidden_recurrent_dim)) B_U = K.in_train_phase(K.dropout(ones, self.dropout_U), ones) constants.append(B_U) else: constants.append(K.cast_to_floatx(1.)) if self.consume_less == 'cpu' and 0 < self.dropout_W < 1: input_shape = self.input_spec[0].shape input_dim = input_shape[-1] ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1))) ones = K.tile(ones, (1, input_dim)) B_W = K.in_train_phase(K.dropout(ones, self.dropout_W), ones) constants.append(B_W) else: constants.append(K.cast_to_floatx(1.)) return constants def get_initial_states(self, x): print("initial state building") # build an all-zero tensor of shape (samples, output_dim) initial_state = K.zeros_like(x) # (samples, timesteps, input_dim) initial_state = K.sum(initial_state, axis=(1, 2)) # (samples,) initial_state = K.expand_dims(initial_state) # (samples, 1) initial_states=[] for dim in self.states_dim: initial_states.append(K.tile(initial_state, [1, dim])) # (samples, output_dim) #initial_states = [initial_state for _ in range(len(self.states))] return initial_states def call(self, x, mask=None): input_shape = self.input_spec[0].shape if self.unroll and input_shape[1] is None: raise ValueError('Cannot unroll a RNN if the ' 'time dimension is undefined. \n' '- If using a Sequential model, ' 'specify the time dimension by passing ' 'an `input_shape` or `batch_input_shape` ' 'argument to your first layer. If your ' 'first layer is an Embedding, you can ' 'also use the `input_length` argument.\n' '- If using the functional API, specify ' 'the time dimension by passing a `shape` ' 'or `batch_shape` argument to your Input layer.') if self.stateful: initial_states = self.states else: initial_states = self.get_initial_states(x) constants = self.get_constants(x) preprocessed_input = self.preprocess_input(x) last_output, outputs, states = K.rnn(self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=input_shape[1]) if self.stateful: updates = [] for i in range(len(states)): updates.append((self.states[i], states[i])) u_t = states[0] bv_t = states[1] bh_t = states[2] if(not self.finetune): self.rbm_rnn = RBM(self.hidden_dim,init=glorot_uniform_sigm, input_dim=self.input_dim, hidden_unit_type='binary', visible_unit_type='gaussian', persistent=self.persistent, batch_size=self.batch_input_shape[0], nb_gibbs_steps=self.nb_gibbs_steps, Wrbm=self.Wrbm, bx=bv_t, bh=bh_t, dropout=self.dropout_RBM) self.rbm_rnn.build([input_shape[0], self.input_dim]) self.loss = self.rbm_rnn.contrastive_divergence_loss self.metrics = self.rbm_rnn.reconstruction_loss x = K.reshape(x, (-1, self.input_dim)) if(not self.finetune): return x else: #return K.sigmoid(K.dot(x, self.Wrbm) + bh_t) return K.dot(x, self.Wrbm) + bh_t #return last_output def set_finetune(self): self.finetune = True self.built = False self.inbound_nodes = [] def get_config(self): config = { 'hidden_dim': self.hidden_dim, 'hidden_recurrent_dim': self.hidden_recurrent_dim, 'init': self.init.__name__, 'inner_init': self.inner_init.__name__, 'activation': self.activation.__name__, 'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None, 'U_regularizer': self.U_regularizer.get_config() if self.U_regularizer else None, 'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None, 'W_regularizer': self.Wrbm_regularizer.get_config() if self.Wrbm_regularizer else None, 'dropout_W': self.dropout_W, 'dropout_U': self.dropout_U, 'dropout_RBM': self.dropout_RBM, 'nb_gibbs_steps' : self.nb_gibbs_steps, 'persistent' : self.persistent, 'finetune' : self.finetune } base_config = super(RNNRBM, self).get_config() return dict(list(base_config.items()) + list(config.items()))
def main(): # generate dummy dataset nframes = 10000 dataset = np.random.normal(loc=np.zeros(input_dim), scale=np.ones(input_dim), size=(nframes, input_dim)) # standardize (in this case superfluous) #dataset, mean, stddev = standardize(dataset) # split into train and test portion ntest = 1000 X_train = dataset[:-ntest:] # all but last 1000 samples for training X_test = dataset[-ntest:, :] # last 1000 samples for testing X_trainsub = dataset[: ntest, :] # subset of training data with same number of samples as testset assert X_train.shape[0] >= X_test.shape[ 0], 'Train set should be at least size of test set!' # setup model structure print('Creating training model...') dbn = DBN([ GBRBM(input_dim, 200, init=glorot_uniform_sigm), RBM(200, 400, init=glorot_uniform_sigm), RBM(400, 300, init=glorot_uniform_sigm), RBM(300, 50, init=glorot_uniform_sigm), RBM(50, hidden_dim, init=glorot_uniform_sigm) ]) # setup optimizer, loss def get_layer_loss(rbm, layer_no): return rbm.contrastive_divergence_loss(nb_gibbs_steps=1) def get_layer_optimizer(layer_no): return SGD((layer_no + 1) * lr, 0., decay=0.0, nesterov=False) dbn.compile(layer_optimizer=get_layer_optimizer, layer_loss=get_layer_loss) # do training print('Training...') begin_time = time.time() #callbacks = [momentum_scheduler, rec_err_logger, free_energy_gap_logger] dbn.fit(X_train, batch_size, nb_epoch, verbose=1, shuffle=False) end_time = time.time() print('Training took %f minutes' % ((end_time - begin_time) / 60.0)) # save model parameters print('Saving model...') dbn.save_weights('example.hdf5', overwrite=True) # load model parameters print('Loading model...') dbn.load_weights('example.hdf5') # generate hidden features from input data print('Creating inference model...') F = dbn.get_forward_inference_layers() B = dbn.get_backward_inference_layers() inference_model = Sequential() for f in F: inference_model.add(f) inference_model.add(SampleBernoulli(mode='random')) for b in B[:-1]: inference_model.add(b) inference_model.add(SampleBernoulli(mode='random')) # last layer is a gaussian layer inference_model.add(B[-1]) print('Compiling Theano graph...') opt = SGD() inference_model.compile( opt, loss='mean_squared_error') # XXX: optimizer and loss are not used! print('Doing inference...') h = inference_model.predict(dataset) print(h) print('Done!')