def __init__(self): super(RNN_GSN, self).__init__() gsn_hiddens = 500 gsn_layers = 2 # RNN that takes in images (3D sequences) and outputs gsn hiddens (3D sequence of them) self.rnn = RNN( input_size=28 * 28, hidden_size=100, # needs to output hidden units for odd layers of GSN output_size=gsn_hiddens * (math.ceil(gsn_layers / 2.)), layers=1, activation='tanh', hidden_activation='relu', weights_init='uniform', weights_interval='montreal', r_weights_init='identity') # Create the GSN that will encode the input space gsn = GSN(input_size=28 * 28, hidden_size=gsn_hiddens, layers=gsn_layers, walkbacks=4, visible_activation='sigmoid', hidden_activation='tanh', image_height=28, image_width=28) # grab the input arguments gsn_args = gsn.args.copy() # grab the parameters it initialized gsn_params = gsn.get_params() # Now hook the two up! RNN should output hiddens for GSN into a 3D tensor (1 set for each timestep) # Therefore, we need to use scan to create the GSN reconstruction for each timestep given the hiddens def step(hiddens, x): gsn = GSN(inputs_hook=(28 * 28, x), hiddens_hook=(gsn_hiddens, hiddens), params_hook=(gsn_params), **gsn_args) # return the reconstruction and cost! return gsn.get_outputs(), gsn.get_train_cost() (outputs, costs), scan_updates = theano.scan( fn=lambda h, x: step(h, x), sequences=[self.rnn.output, self.rnn.input], outputs_info=[None, None]) self.outputs = outputs self.updates = dict() self.updates.update(self.rnn.get_updates()) self.updates.update(scan_updates) self.cost = costs.sum() self.params = gsn_params + self.rnn.get_params()
def main(): data = TextDataset( path='../../../../datasets/shakespeare_input.txt', source= "http://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt", target_n_future=1, sequence_length=50) rnn = RNN(outdir='outputs/rnn/', input_size=len(data.vocab), hidden_size=128, output_size=len(data.vocab), layers=2, activation='softmax', hidden_activation='relu', mrg=RNG_MRG.MRG_RandomStreams(1), weights_init='uniform', weights_interval='montreal', bias_init=0.0, r_weights_init='identity', r_bias_init=0.0, cost_function='nll', cost_args=None, noise='dropout', noise_level=.7, noise_decay='exponential', noise_decay_amount=.99, direction='forward') cost_monitor = Monitor("cost", rnn.get_train_cost(), train=False, valid=True, test=True) optimizer = RMSProp(model=rnn, dataset=data, grad_clip=5., hard_clip=False, learning_rate=2e-3, lr_decay='exponential', lr_decay_factor=0.97, decay=0.95, batch_size=50, epochs=50) # optimizer = AdaDelta(model=gsn, dataset=mnist, n_epoch=200, batch_size=100, learning_rate=1e-6) optimizer.train(monitor_channels=cost_monitor)
def main(): data = TextDataset(path='../../../../datasets/shakespeare_input.txt', source="http://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt", target_n_future=1, sequence_length=50) rnn = RNN(outdir='outputs/rnn/', input_size=len(data.vocab), hidden_size=128, output_size=len(data.vocab), layers=2, activation='softmax', hidden_activation='relu', mrg=RNG_MRG.MRG_RandomStreams(1), weights_init='uniform', weights_interval='montreal', bias_init=0.0, r_weights_init='identity', r_bias_init=0.0, cost_function='nll', cost_args=None, noise='dropout', noise_level=.7, noise_decay='exponential', noise_decay_amount=.99, direction='forward') cost_monitor = Monitor("cost", rnn.get_train_cost(), train=False, valid=True, test=True) optimizer = RMSProp(model=rnn, dataset=data, grad_clip=5., hard_clip=False, learning_rate=2e-3, lr_decay='exponential', lr_decay_factor=0.97, decay=0.95, batch_size=50, epochs=50) # optimizer = AdaDelta(model=gsn, dataset=mnist, n_epoch=200, batch_size=100, learning_rate=1e-6) optimizer.train(monitor_channels=cost_monitor)
def __init__(self): super(RNN_GSN, self).__init__() gsn_hiddens = 500 gsn_layers = 2 # RNN that takes in images (3D sequences) and outputs gsn hiddens (3D sequence of them) self.rnn = RNN( input_size=28 * 28, hidden_size=100, # needs to output hidden units for odd layers of GSN output_size=gsn_hiddens * (math.ceil(gsn_layers/2.)), layers=1, activation='tanh', hidden_activation='relu', weights_init='uniform', weights_interval='montreal', r_weights_init='identity' ) # Create the GSN that will encode the input space gsn = GSN( input_size=28 * 28, hidden_size=gsn_hiddens, layers=gsn_layers, walkbacks=4, visible_activation='sigmoid', hidden_activation='tanh', image_height=28, image_width=28 ) # grab the input arguments gsn_args = gsn.args.copy() # grab the parameters it initialized gsn_params = gsn.get_params() # Now hook the two up! RNN should output hiddens for GSN into a 3D tensor (1 set for each timestep) # Therefore, we need to use scan to create the GSN reconstruction for each timestep given the hiddens def step(hiddens, x): gsn = GSN( inputs_hook=(28*28, x), hiddens_hook=(gsn_hiddens, hiddens), params_hook=(gsn_params), **gsn_args ) # return the reconstruction and cost! return gsn.get_outputs(), gsn.get_train_cost() (outputs, costs), scan_updates = theano.scan( fn=lambda h, x: step(h, x), sequences=[self.rnn.output, self.rnn.input], outputs_info=[None, None] ) self.outputs = outputs self.updates = dict() self.updates.update(self.rnn.get_updates()) self.updates.update(scan_updates) self.cost = costs.sum() self.params = gsn_params + self.rnn.get_params()