# Forward Propagate hiddens[state],hiddens[output] = decoder_layer.forward_prop(INIT_PRED,hiddens[state],hiddens[output]) # Get predicton INIT_PRED= self.output_layer.forward_prop(hiddens[output]) pred = T.cast(T.argmax(INIT_PRED),theano.config.floatX) # Put all returns into a list so the scan function # doesn't have to decompile multiple lists return_list = [pred,INIT_PRED] + hiddens return return_list def calc_cost(self,pred,Y): return T.mean(T.nnet.categorical_crossentropy(pred,Y)) try: rnn = utils.load_net('encoder_decoder') except: rnn = RNN(wh.vocab_size,embed_size,encoder_nodes,decoder_nodes,batch_size) print("created new network") params = rnn.update_params memory_params = rnn.memory_params # Generate the outputs_info # This instructs Theano how to update variables during a scan. # WARNING: Things are about to get SUPER ugly up in here. outputs_info = [] ENCODER_HIDDENS,DECODER_HIDDENS = unravelHiddens(HIDDEN_STATES) for e in ENCODER_HIDDENS: outputs_info.append(dict(initial=e, taps=[-1]))
def genHiddens(self, batch_size, layer): return np.zeros((batch_size, layer.y), dtype='float32'), np.zeros( (batch_size, layer.y), dtype='float32') def calc_cost(self, X, Y, S1, H1, S2, H2, S3, H3): e = self.input_layer.forward_prop(X) S1, H1 = self.hidden_layer_1.forward_prop(e, S1, H1) S2, H2 = self.hidden_layer_2.forward_prop(H1, S2, H2) S3, H3 = self.hidden_layer_3.forward_prop(H2, S3, H3) pred = self.output_layer.forward_prop(H3) cost = T.nnet.categorical_crossentropy(pred, Y).mean() return cost, pred, S1, H1, S2, H2, S3, H3 try: rnn = utils.load_net('rap') except: rnn = RNN(wh.vocab_size, embed_size, nodes, batch_size) print("created new network") params = rnn.update_params memory_params = rnn.memory_params outputs_info = [ None, None, dict(initial=S1, taps=[-1]), dict(initial=H1, taps=[-1]), dict(initial=S2, taps=[-1]), dict(initial=H2, taps=[-1]), dict(initial=S3, taps=[-1]), dict(initial=H3, taps=[-1])
def genHiddens(self, batch_size, layer): return np.zeros((batch_size, layer.y), dtype='float32'), np.zeros( (batch_size, layer.y), dtype='float32') def calc_cost(self, X, Y, S1, H1, S2, H2): e = self.input_layer.forward_prop(X) S1, H1 = self.hidden_layer_1.forward_prop(e, S1, H1) S2, H2 = self.hidden_layer_2.forward_prop(H1, S2, H2) pred = self.output_layer.forward_prop(H2) cost = T.nnet.categorical_crossentropy(pred, Y).mean() return cost, pred, S1, H1, S2, H2 try: rnn = utils.load_net('lor') print('loaded previous network') except: rnn = RNN(wh.vocab_size, embed_size, nodes, batch_size) print("created new network") params = rnn.update_params memory_params = rnn.memory_params outputs_info = [ None, None, dict(initial=S1, taps=[-1]), dict(initial=H1, taps=[-1]), dict(initial=S2, taps=[-1]), dict(initial=H2, taps=[-1]) ]
# I don't have to throw out my previous # calculation for gradients of a 1-layer # model and start all over. grads = T.grad(cost=cost, wrt=params) updates = [] for p,g,m in zip(params, grads, mem): g = T.clip(g,-5.,5) new_m = m + (g * g) # Here's where the update list mentioned in # init comes into play. updates.append((m,new_m)) updates.append((p, p - ((lr * g) / T.sqrt(new_m + 1e-8)))) return updates try: nn = utils.load_net('explain') except: nn = ExplainNetwork([5,10,10,1]) print("created new network") ############################################# BEGIN THEANO FUNCTION DEFINITIONS ################################### params = nn.update_params memory_params = nn.memory_params cost,pred,h2,h = nn.calc_cost(X,Y) y_pred,ph2,ph = nn.predict(X) updates = nn.Adagrad(cost,params,memory_params) back_prop = theano.function(inputs=[X,Y], outputs=[cost,pred,h2,h], updates=updates, allow_input_downcast=True) predict = theano.function(inputs=[X], outputs=[y_pred,ph2,ph], updates=None, allow_input_downcast=True) ##############################################################################################################
# calculation for gradients of a 1-layer # model and start all over. grads = T.grad(cost=cost, wrt=params) updates = [] for p, g, m in zip(params, grads, mem): g = T.clip(g, -5., 5) new_m = m + (g * g) # Here's where the update list mentioned in # init comes into play. updates.append((m, new_m)) updates.append((p, p - ((lr * g) / T.sqrt(new_m + 1e-8)))) return updates try: nn = utils.load_net('explain') except: nn = ExplainNetwork([5, 10, 10, 1]) print("created new network") ############################################# BEGIN THEANO FUNCTION DEFINITIONS ################################### params = nn.update_params memory_params = nn.memory_params cost, pred, h2, h = nn.calc_cost(X, Y) y_pred, ph2, ph = nn.predict(X) updates = nn.Adagrad(cost, params, memory_params) back_prop = theano.function(inputs=[X, Y], outputs=[cost, pred, h2, h], updates=updates, allow_input_downcast=True)
hiddens[state], hiddens[output] = decoder_layer.forward_prop( INIT_PRED, hiddens[state], hiddens[output]) # Get predicton INIT_PRED = self.output_layer.forward_prop(hiddens[output]) pred = T.cast(T.argmax(INIT_PRED), theano.config.floatX) # Put all returns into a list so the scan function # doesn't have to decompile multiple lists return_list = [pred, INIT_PRED] + hiddens return return_list def calc_cost(self, pred, Y): return T.mean(T.nnet.categorical_crossentropy(pred, Y)) try: rnn = utils.load_net('encoder_decoder') except: rnn = RNN(wh.vocab_size, embed_size, encoder_nodes, decoder_nodes, batch_size) print("created new network") params = rnn.update_params memory_params = rnn.memory_params # Generate the outputs_info # This instructs Theano how to update variables during a scan. # WARNING: Things are about to get SUPER ugly up in here. outputs_info = [] ENCODER_HIDDENS, DECODER_HIDDENS = unravelHiddens(HIDDEN_STATES) for e in ENCODER_HIDDENS: outputs_info.append(dict(initial=e, taps=[-1]))
S2,H2 = self.hidden_layer_2.forward_prop(H1,S2,H2) S3,H3 = self.hidden_layer_3.forward_prop(H2,S3,H3) pred = self.output_layer.forward_prop(H3) INIT_PRED = T.cast(T.argmax(pred),theano.config.floatX) # argmax returns an int, we need to keep everything floatX return pred,INIT_PRED,S1,H1,S2,H2,S3,H3 def calc_cost(self,pred,Y): return T.mean(T.nnet.categorical_crossentropy(pred,Y)) #nodes = [512,512,512] nodes = [128,256,128] #nodes = [100,100,100] try: rnn = utils.load_net('word_reverser') except: rnn = RNN(wh.vocab_size,embed_size,nodes,batch_size) print("created new network") params = rnn.update_params memory_params = rnn.memory_params outputs_info=[dict(initial=S1, taps=[-1]),dict(initial=H1, taps=[-1]), dict(initial=S2, taps=[-1]),dict(initial=H2, taps=[-1]), dict(initial=S3, taps=[-1]),dict(initial=H3, taps=[-1]) ] # The f_ stands for forward_prop f_states1,f_outputs1,f_states2,f_outputs2,f_states3,f_outputs3 = theano.scan(fn=rnn.set_hiddens, outputs_info=outputs_info, sequences=[X_LIST]
S2,H2 = self.hidden_layer_2.forward_prop(H1,S2,H2) S3,H3 = self.hidden_layer_3.forward_prop(H2,S3,H3) pred = self.output_layer.forward_prop(H3) INIT_PRED = T.cast(T.argmax(pred),theano.config.floatX) # argmax returns an int, we need to keep everything floatX return pred,INIT_PRED,S1,H1,S2,H2,S3,H3 def calc_cost(self,pred,Y): return T.mean(T.nnet.categorical_crossentropy(pred,Y)) #nodes = [512,512,512] nodes = [128,256,128] #nodes = [100,100,100] try: rnn = utils.load_net('udacity') except: rnn = RNN(wh.vocab_size,embed_size,nodes,batch_size) print("created new network") params = rnn.update_params memory_params = rnn.memory_params outputs_info=[dict(initial=S1, taps=[-1]),dict(initial=H1, taps=[-1]), dict(initial=S2, taps=[-1]),dict(initial=H2, taps=[-1]), dict(initial=S3, taps=[-1]),dict(initial=H3, taps=[-1]) ] # The f_ stands for forward_prop f_states1,f_outputs1,f_states2,f_outputs2,f_states3,f_outputs3 = theano.scan(fn=rnn.set_hiddens, outputs_info=outputs_info, sequences=[X_LIST]
self.memory_params += self.output_layer.memory_params def genHiddens(self,batch_size,layer): return np.zeros((batch_size,layer.y),dtype='float32'),np.zeros((batch_size,layer.y),dtype='float32') def calc_cost(self,X,Y,S1,H1,S2,H2,S3,H3): e = self.input_layer.forward_prop(X) S1,H1 = self.hidden_layer_1.forward_prop(e,S1,H1) S2,H2 = self.hidden_layer_2.forward_prop(H1,S2,H2) S3,H3 = self.hidden_layer_3.forward_prop(H2,S3,H3) pred = self.output_layer.forward_prop(H3) cost = T.nnet.categorical_crossentropy(pred,Y).mean() return cost,pred,S1,H1,S2,H2,S3,H3 try: rnn = utils.load_net('rap') except: rnn = RNN(wh.vocab_size,embed_size,nodes,batch_size) print("created new network") params = rnn.update_params memory_params = rnn.memory_params outputs_info=[None,None,dict(initial=S1, taps=[-1]),dict(initial=H1, taps=[-1]), dict(initial=S2, taps=[-1]),dict(initial=H2, taps=[-1]), dict(initial=S3, taps=[-1]),dict(initial=H3, taps=[-1]) ] scan_costs,y_preds,states1,outputs1,states2,outputs2,states3,outputs3 = theano.scan(fn=rnn.calc_cost, outputs_info=outputs_info, sequences=[X_LIST,Y_LIST]