# Forward Propagate
            hiddens[state],hiddens[output] = decoder_layer.forward_prop(INIT_PRED,hiddens[state],hiddens[output])
        # Get predicton 
        INIT_PRED= self.output_layer.forward_prop(hiddens[output])
        pred = T.cast(T.argmax(INIT_PRED),theano.config.floatX)
        # Put all returns into a list so the scan function
        # doesn't have to decompile multiple lists
        return_list = [pred,INIT_PRED] + hiddens
        return return_list

    def calc_cost(self,pred,Y):
        return T.mean(T.nnet.categorical_crossentropy(pred,Y))
        

try:
    rnn = utils.load_net('encoder_decoder') 
except:
    rnn = RNN(wh.vocab_size,embed_size,encoder_nodes,decoder_nodes,batch_size)
    print("created new network")

params = rnn.update_params
memory_params = rnn.memory_params

# Generate the outputs_info
#   This instructs Theano how to update variables during a scan.
#   WARNING: Things are about to get SUPER ugly up in here.
outputs_info = []
ENCODER_HIDDENS,DECODER_HIDDENS = unravelHiddens(HIDDEN_STATES)
for e in ENCODER_HIDDENS:
    outputs_info.append(dict(initial=e, taps=[-1]))
Beispiel #2
0
    def genHiddens(self, batch_size, layer):
        return np.zeros((batch_size, layer.y), dtype='float32'), np.zeros(
            (batch_size, layer.y), dtype='float32')

    def calc_cost(self, X, Y, S1, H1, S2, H2, S3, H3):
        e = self.input_layer.forward_prop(X)
        S1, H1 = self.hidden_layer_1.forward_prop(e, S1, H1)
        S2, H2 = self.hidden_layer_2.forward_prop(H1, S2, H2)
        S3, H3 = self.hidden_layer_3.forward_prop(H2, S3, H3)
        pred = self.output_layer.forward_prop(H3)
        cost = T.nnet.categorical_crossentropy(pred, Y).mean()
        return cost, pred, S1, H1, S2, H2, S3, H3


try:
    rnn = utils.load_net('rap')
except:
    rnn = RNN(wh.vocab_size, embed_size, nodes, batch_size)
    print("created new network")

params = rnn.update_params
memory_params = rnn.memory_params

outputs_info = [
    None, None,
    dict(initial=S1, taps=[-1]),
    dict(initial=H1, taps=[-1]),
    dict(initial=S2, taps=[-1]),
    dict(initial=H2, taps=[-1]),
    dict(initial=S3, taps=[-1]),
    dict(initial=H3, taps=[-1])
Beispiel #3
0
    def genHiddens(self, batch_size, layer):
        return np.zeros((batch_size, layer.y), dtype='float32'), np.zeros(
            (batch_size, layer.y), dtype='float32')

    def calc_cost(self, X, Y, S1, H1, S2, H2):
        e = self.input_layer.forward_prop(X)
        S1, H1 = self.hidden_layer_1.forward_prop(e, S1, H1)
        S2, H2 = self.hidden_layer_2.forward_prop(H1, S2, H2)
        pred = self.output_layer.forward_prop(H2)
        cost = T.nnet.categorical_crossentropy(pred, Y).mean()
        return cost, pred, S1, H1, S2, H2


try:
    rnn = utils.load_net('lor')
    print('loaded previous network')
except:
    rnn = RNN(wh.vocab_size, embed_size, nodes, batch_size)
    print("created new network")

params = rnn.update_params
memory_params = rnn.memory_params

outputs_info = [
    None, None,
    dict(initial=S1, taps=[-1]),
    dict(initial=H1, taps=[-1]),
    dict(initial=S2, taps=[-1]),
    dict(initial=H2, taps=[-1])
]
Beispiel #4
0
        # I don't have to throw out my previous
        # calculation for gradients of a 1-layer
        # model and start all over.
        grads = T.grad(cost=cost, wrt=params)
        updates = []
        for p,g,m in zip(params, grads, mem):
            g = T.clip(g,-5.,5)
            new_m = m + (g * g)
            # Here's where the update list mentioned in
            # init comes into play.
            updates.append((m,new_m))
            updates.append((p, p - ((lr * g) / T.sqrt(new_m + 1e-8))))
        return updates

try:
    nn = utils.load_net('explain')
except:
    nn = ExplainNetwork([5,10,10,1])
    print("created new network")
############################################# BEGIN THEANO FUNCTION DEFINITIONS ###################################
params = nn.update_params
memory_params = nn.memory_params

cost,pred,h2,h = nn.calc_cost(X,Y)
y_pred,ph2,ph = nn.predict(X)

updates = nn.Adagrad(cost,params,memory_params)
back_prop = theano.function(inputs=[X,Y], outputs=[cost,pred,h2,h], updates=updates, allow_input_downcast=True)
predict = theano.function(inputs=[X], outputs=[y_pred,ph2,ph], updates=None, allow_input_downcast=True)
##############################################################################################################
Beispiel #5
0
        # calculation for gradients of a 1-layer
        # model and start all over.
        grads = T.grad(cost=cost, wrt=params)
        updates = []
        for p, g, m in zip(params, grads, mem):
            g = T.clip(g, -5., 5)
            new_m = m + (g * g)
            # Here's where the update list mentioned in
            # init comes into play.
            updates.append((m, new_m))
            updates.append((p, p - ((lr * g) / T.sqrt(new_m + 1e-8))))
        return updates


try:
    nn = utils.load_net('explain')
except:
    nn = ExplainNetwork([5, 10, 10, 1])
    print("created new network")
############################################# BEGIN THEANO FUNCTION DEFINITIONS ###################################
params = nn.update_params
memory_params = nn.memory_params

cost, pred, h2, h = nn.calc_cost(X, Y)
y_pred, ph2, ph = nn.predict(X)

updates = nn.Adagrad(cost, params, memory_params)
back_prop = theano.function(inputs=[X, Y],
                            outputs=[cost, pred, h2, h],
                            updates=updates,
                            allow_input_downcast=True)
Beispiel #6
0
            hiddens[state], hiddens[output] = decoder_layer.forward_prop(
                INIT_PRED, hiddens[state], hiddens[output])
        # Get predicton
        INIT_PRED = self.output_layer.forward_prop(hiddens[output])
        pred = T.cast(T.argmax(INIT_PRED), theano.config.floatX)
        # Put all returns into a list so the scan function
        # doesn't have to decompile multiple lists
        return_list = [pred, INIT_PRED] + hiddens
        return return_list

    def calc_cost(self, pred, Y):
        return T.mean(T.nnet.categorical_crossentropy(pred, Y))


try:
    rnn = utils.load_net('encoder_decoder')
except:
    rnn = RNN(wh.vocab_size, embed_size, encoder_nodes, decoder_nodes,
              batch_size)
    print("created new network")

params = rnn.update_params
memory_params = rnn.memory_params

# Generate the outputs_info
#   This instructs Theano how to update variables during a scan.
#   WARNING: Things are about to get SUPER ugly up in here.
outputs_info = []
ENCODER_HIDDENS, DECODER_HIDDENS = unravelHiddens(HIDDEN_STATES)
for e in ENCODER_HIDDENS:
    outputs_info.append(dict(initial=e, taps=[-1]))
Beispiel #7
0
        S2,H2 = self.hidden_layer_2.forward_prop(H1,S2,H2)
        S3,H3 = self.hidden_layer_3.forward_prop(H2,S3,H3)
        pred = self.output_layer.forward_prop(H3)
        INIT_PRED = T.cast(T.argmax(pred),theano.config.floatX)  # argmax returns an int, we need to keep everything floatX
        return pred,INIT_PRED,S1,H1,S2,H2,S3,H3

    def calc_cost(self,pred,Y):
        return T.mean(T.nnet.categorical_crossentropy(pred,Y))


#nodes = [512,512,512]
nodes = [128,256,128]
#nodes = [100,100,100]

try:
    rnn = utils.load_net('word_reverser')
except:
    rnn = RNN(wh.vocab_size,embed_size,nodes,batch_size)
    print("created new network")

params = rnn.update_params
memory_params = rnn.memory_params

outputs_info=[dict(initial=S1, taps=[-1]),dict(initial=H1, taps=[-1]),
              dict(initial=S2, taps=[-1]),dict(initial=H2, taps=[-1]),
              dict(initial=S3, taps=[-1]),dict(initial=H3, taps=[-1])
              ]
# The f_ stands for forward_prop
f_states1,f_outputs1,f_states2,f_outputs2,f_states3,f_outputs3 = theano.scan(fn=rnn.set_hiddens,
                              outputs_info=outputs_info,
                              sequences=[X_LIST]
        S2,H2 = self.hidden_layer_2.forward_prop(H1,S2,H2)
        S3,H3 = self.hidden_layer_3.forward_prop(H2,S3,H3)
        pred = self.output_layer.forward_prop(H3)
        INIT_PRED = T.cast(T.argmax(pred),theano.config.floatX)  # argmax returns an int, we need to keep everything floatX
        return pred,INIT_PRED,S1,H1,S2,H2,S3,H3

    def calc_cost(self,pred,Y):
        return T.mean(T.nnet.categorical_crossentropy(pred,Y))
        
    
#nodes = [512,512,512]
nodes = [128,256,128]
#nodes = [100,100,100]

try:
    rnn = utils.load_net('udacity') 
except:
    rnn = RNN(wh.vocab_size,embed_size,nodes,batch_size)
    print("created new network")

params = rnn.update_params
memory_params = rnn.memory_params

outputs_info=[dict(initial=S1, taps=[-1]),dict(initial=H1, taps=[-1]),
              dict(initial=S2, taps=[-1]),dict(initial=H2, taps=[-1]),
              dict(initial=S3, taps=[-1]),dict(initial=H3, taps=[-1])
              ]
# The f_ stands for forward_prop
f_states1,f_outputs1,f_states2,f_outputs2,f_states3,f_outputs3 = theano.scan(fn=rnn.set_hiddens,
                              outputs_info=outputs_info,
                              sequences=[X_LIST]
Beispiel #9
0
        self.memory_params += self.output_layer.memory_params

    def genHiddens(self,batch_size,layer):
        return np.zeros((batch_size,layer.y),dtype='float32'),np.zeros((batch_size,layer.y),dtype='float32')

    def calc_cost(self,X,Y,S1,H1,S2,H2,S3,H3):
        e = self.input_layer.forward_prop(X)
        S1,H1 = self.hidden_layer_1.forward_prop(e,S1,H1)
        S2,H2 = self.hidden_layer_2.forward_prop(H1,S2,H2)
        S3,H3 = self.hidden_layer_3.forward_prop(H2,S3,H3)
        pred = self.output_layer.forward_prop(H3)
        cost = T.nnet.categorical_crossentropy(pred,Y).mean()
        return cost,pred,S1,H1,S2,H2,S3,H3

try:
    rnn = utils.load_net('rap')
except:
    rnn = RNN(wh.vocab_size,embed_size,nodes,batch_size)
    print("created new network")

params = rnn.update_params
memory_params = rnn.memory_params

outputs_info=[None,None,dict(initial=S1, taps=[-1]),dict(initial=H1, taps=[-1]),
                      dict(initial=S2, taps=[-1]),dict(initial=H2, taps=[-1]),
                      dict(initial=S3, taps=[-1]),dict(initial=H3, taps=[-1])
                      ]

scan_costs,y_preds,states1,outputs1,states2,outputs2,states3,outputs3 = theano.scan(fn=rnn.calc_cost,
                              outputs_info=outputs_info,
                              sequences=[X_LIST,Y_LIST]