Exemple #1
0
 def ff(self, x, phase):
     self.dropmask = owl.randb(x.shape, self.keep_ratio)
     if phase == "TRAIN":
         return ele.mult(x, self.dropmask)*self.scale
     else:
         return x
     '''
Exemple #2
0
 def ff(self, x, phase):
     self.dropmask = owl.randb(x.shape, self.keep_ratio)
     if phase == "TRAIN":
         return ele.mult(x, self.dropmask) * self.scale
     else:
         #return x * (1 - self.params.dropout_param.dropout_ratio)
         return x
     '''
Exemple #3
0
 def ff(self, x, phase):
     ''' Foward function of dropout
     
     The dropout mask will not be multiplied if under ``"TEST"`` mode.
     '''
     if phase == "TRAIN":
         self.dropmask = owl.randb(x.shape, self.keep_ratio)
         return ele.mult(x, self.dropmask) * self.scale
     else:
         return x
Exemple #4
0
 def ff(self, x, phase):
     ''' Foward function of dropout
     
     The dropout mask will not be multiplied if under ``"TEST"`` mode.
     '''
     self.dropmask = owl.randb(x.shape, self.keep_ratio)
     if phase == "TRAIN":
         return ele.mult(x, self.dropmask)*self.scale
     else:
         return x
Exemple #5
0
    def getloss(self):
        #get accuracy
        '''
        batch_size = self.ff_y.shape[1]
        predict = self.ff_y.argmax(0)
        ground_truth = self.y.argmax(0)
        correct = (predict - ground_truth).count_zero()
        acc = 1 - (batch_size - correct) * 1.0 / batch_size
        print acc
        '''

        lossmat = ele.mult(ele.ln(self.ff_y), self.y)
        res = lossmat.sum(0).sum(1).to_numpy()
        return -res[0][0] / lossmat.shape[1]
        '''
Exemple #6
0
 def bp(self, y):
     return ele.mult(y, self.dropmask)*self.scale
     '''
Exemple #7
0
    def train_one_mb(self, data, label, dropout_rate):
        num_samples = data.shape[-1]
        num_layers = 12
        acts = [None] * num_layers
        sens = [None] * num_layers
        weightsgrad = [None] * self.num_weights
        biasgrad = [None] * self.num_weights

        # FF
        acts[0] = data
        acts[1] = ele.relu(self.convs[0].ff(acts[0], self.weights[0], self.bias[0])) # conv1
        acts[2] = self.poolings[0].ff(acts[1]) # pool1
        acts[3] = ele.relu(self.convs[1].ff(acts[2], self.weights[1], self.bias[1])) # conv2
        acts[4] = self.poolings[1].ff(acts[3]) # pool2
        acts[5] = ele.relu(self.convs[2].ff(acts[4], self.weights[2], self.bias[2])) # conv3
        acts[6] = ele.relu(self.convs[3].ff(acts[5], self.weights[3], self.bias[3])) # conv4
        acts[7] = ele.relu(self.convs[4].ff(acts[6], self.weights[4], self.bias[4])) # conv5
        acts[8] = self.poolings[2].ff(acts[7]) # pool5
        re_acts8 = acts[8].reshape([np.prod(acts[8].shape[0:3]), num_samples])
        acts[9] = ele.relu(self.weights[5] * re_acts8 + self.bias[5]) # fc6
        mask6 = owl.randb(acts[9].shape, dropout_rate)
        acts[9] = ele.mult(acts[9], mask6) # drop6
        acts[10] = ele.relu(self.weights[6] * acts[9] + self.bias[6]) # fc7
        mask7 = owl.randb(acts[10].shape, dropout_rate)
        acts[10] = ele.mult(acts[10], mask7) # drop7
        acts[11] = self.weights[7] * acts[10] + self.bias[7] # fc8

        out = co.softmax(acts[11], co.soft_op.instance) # prob

        sens[11] = out - label
        sens[10] = self.weights[7].trans() * sens[11] # fc8
        sens[10] = ele.mult(sens[10], mask7) # drop7
        sens[10] = ele.relu_back(sens[10], acts[10]) # relu7
        sens[9] = self.weights[6].trans() * sens[10]
        sens[9] = ele.mult(sens[9], mask6) # drop6
        sens[9] = ele.relu_back(sens[9], acts[9]) # relu6
        sens[8] = (self.weights[5].trans() * sens[9]).reshape(acts[8].shape) # fc6
        sens[7] = ele.relu_back(self.poolings[2].bp(sens[8], acts[8], acts[7]), acts[7]) # pool5, relu5
        sens[6] = ele.relu_back(self.convs[4].bp(sens[7], acts[6], self.weights[4]), acts[6]) # conv5, relu4
        sens[5] = ele.relu_back(self.convs[3].bp(sens[6], acts[5], self.weights[3]), acts[5]) # conv4, relu3
        sens[4] = self.convs[2].bp(sens[5], acts[4], self.weights[2]) # conv3
        sens[3] = ele.relu_back(self.poolings[1].bp(sens[4], acts[4], acts[3]), acts[3]) # pool2, relu2
        sens[2] = self.convs[1].bp(sens[3], acts[2], self.weights[1]) # conv2
        sens[1] = self.poolings[0].bp(sens[2], acts[2], acts[1]) # pool1
        sens[1] = ele.relu_back(sens[1], acts[1]) # relu1

        weightsgrad[7] = sens[11] * acts[10].trans()
        weightsgrad[6] = sens[10] * acts[9].trans()
        weightsgrad[5] = sens[9] * re_acts8.trans()
        weightsgrad[4] = self.convs[4].weight_grad(sens[7], acts[6], self.weights[4])
        weightsgrad[3] = self.convs[3].weight_grad(sens[6], acts[5], self.weights[3])
        weightsgrad[2] = self.convs[2].weight_grad(sens[5], acts[4], self.weights[2])
        weightsgrad[1] = self.convs[1].weight_grad(sens[3], acts[2], self.weights[1])
        weightsgrad[0] = self.convs[0].weight_grad(sens[1], acts[0], self.weights[0])
        biasgrad[7] = sens[11].sum(1)
        biasgrad[6] = sens[10].sum(1)
        biasgrad[5] = sens[9].sum(1)
        biasgrad[4] = self.convs[4].bias_grad(sens[7])
        biasgrad[3] = self.convs[3].bias_grad(sens[6])
        biasgrad[2] = self.convs[2].bias_grad(sens[5])
        biasgrad[1] = self.convs[1].bias_grad(sens[3])
        biasgrad[0] = self.convs[0].bias_grad(sens[1])
        return (out, weightsgrad, biasgrad)
Exemple #8
0
def LSTM_test(model, sents, words, tanhC_version=1):

    N = model.Layers[1]
    K = model.Layers[2]

    test_ll = 0
    # For each sentence
    for sent_id, sent in enumerate(sents):
        #print sent_id
        #print "sent", sent
        #print "sents", sents
        ##### Initialize activations #####

        Tau = len(sent)
        sent_ll = 0  # Sentence log likelihood

        data = [None] * Tau

        Hout = [None] * Tau
        Hout[0] = owl.zeros([N, 1])

        act_ig = [None] * Tau
        act_fg = [None] * Tau
        act_og = [None] * Tau
        act_ff = [None] * Tau

        C = [None] * Tau
        C[0] = owl.zeros([N, 1])

        ##### Forward pass #####
        # For each time step

        for t in range(1, Tau):
            # predict the (t+1)'th word from the t'th word
            data[t] = model.emb_weight[sent[t - 1]]

            act_ig[t] = model.ig_weight_data * data[
                t] + model.ig_weight_prev * Hout[
                    t - 1] + model.ig_weight_cell * C[t -
                                                      1] + model.ig_weight_bias
            act_ig[t] = ele.sigm(act_ig[t])

            act_fg[t] = model.fg_weight_data * data[
                t] + model.fg_weight_prev * Hout[
                    t - 1] + model.fg_weight_cell * C[t -
                                                      1] + model.fg_weight_bias
            act_fg[t] = ele.sigm(act_fg[t])

            act_ff[t] = model.ff_weight_data * data[
                t] + model.ff_weight_prev * Hout[t - 1] + model.ff_weight_bias
            act_ff[t] = ele.tanh(act_ff[t])

            C[t] = ele.mult(act_ig[t], act_ff[t]) + ele.mult(
                act_fg[t], C[t - 1])

            act_og[t] = model.og_weight_data * data[
                t] + model.og_weight_prev * Hout[
                    t - 1] + model.og_weight_cell * C[t] + model.og_weight_bias
            act_og[t] = ele.sigm(act_og[t])

            if tanhC_version:
                Hout[t] = ele.mult(act_og[t], ele.tanh(C[t]))
            else:
                Hout[t] = ele.mult(act_og[t], C[t])

            Y = softmax(model.decoder_weights * Hout[t] + model.decoder_bias)

            # evaluation
            output = Y.to_numpy()  # Can directly get a single element from Y
            # print output[0, sent[t]]
            sent_ll += math.log(max(output[0, sent[t]], 1e-20), 2)

        test_ll += sent_ll

    test_ent = test_ll * (-1) / words
    test_ppl = 2**test_ent

    print "Test PPL =", test_ppl
Exemple #9
0
def LSTM_test(model, sents, vocab_size, words, tanhC_version=1):

    N = 10
    K = vocab_size

    test_ll = 0
    # For each sentence
    for sent_id, sent in enumerate(sents):
        #print "sent_id",sent_id
        #print "sent", sent
        #print "sents", sents
        ##### Initialize activations #####
        Tau = len(sent)
        sent_ll = 0  # Sentence log likelihood
        batch_size = Tau

        data = [None] * Tau
        prev = [None] * Tau
        embed = np.zeros((K, 1))
        embed[sent[0]] = 1
        data[0] = owl.from_numpy(embed).trans()

        Hout = [None] * Tau
        Hout[0] = owl.zeros([N, 1])

        act_ig = [None] * Tau
        act_fg = [None] * Tau
        act_og = [None] * Tau
        act_ff = [None] * Tau

        C = [None] * Tau
        C[0] = owl.zeros([N, 1])
        Ym = [None] * Tau

        ##### Forward pass #####
        # For each time step
        for t in range(1, Tau):
            prev[t] = Hout[t - 1]
            embed = np.zeros((K, 1))
            embed[sent[t]] = 1
            data[t] = owl.from_numpy(embed).trans()

            act_ig[t] = model.ig_weight_data.trans() * data[
                t - 1] + model.ig_weight_prev.trans(
                ) * prev[t] + model.ig_weight_bias
            act_fg[t] = model.fg_weight_data.trans() * data[
                t - 1] + model.fg_weight_prev.trans(
                ) * prev[t] + model.fg_weight_bias
            act_og[t] = model.og_weight_data.trans() * data[
                t - 1] + model.og_weight_prev.trans(
                ) * prev[t] + model.og_weight_bias
            act_ff[t] = model.ff_weight_data.trans() * data[
                t - 1] + model.ff_weight_prev.trans(
                ) * prev[t] + model.ff_weight_bias

            act_ig[t] = ele.sigm(act_ig[t])
            act_fg[t] = ele.sigm(act_fg[t])
            act_og[t] = ele.sigm(act_og[t])
            act_ff[t] = ele.tanh(act_ff[t])

            C[t] = ele.mult(act_ig[t], act_ff[t]) + ele.mult(
                act_fg[t], C[t - 1])

            if tanhC_version:
                Hout[t] = ele.mult(act_og[t], ele.tanh(C[t]))
            else:
                Hout[t] = ele.mult(act_og[t], C[t])
            Ym[t] = softmax(model.decoder_weights.trans() * Hout[t] +
                            model.decoder_bias)

            #print "Y_0[t]",Y_o[t]
            #print "Y_o[t][sent[t]]",Y_o[t][sent[t]]
            output = Ym[t].trans() * data[t]
            test_ll += math.log10(max(np.sum(output.to_numpy()), 1e-20))

    print test_ll
    test_ent = test_ll * (-1) / words
    test_ppl = 10**test_ent

    print("Test PPL = %f" % (test_ppl))
Exemple #10
0
def LSTM_train(model, sents, words, learning_rate, EPOCH, tanhC_version = 1):

	# Constants
	N = model.Layers[1]       # Number of units
	K = model.Layers[2]       # Vocabulary size

	last_time = time.time()
	# For each epoch
	for epoch_id in range(1, EPOCH + 1):
		epoch_ll = 0
		# For each sentence
		for sent_id, sent in enumerate(sents):
			#print sent_id
			#print "sent", sent
			#print "sents", sents
			##### Initialize activations #####

			Tau = len(sent)
			sent_ll = 0 # Sentence log likelihood

			data = [None] * Tau

			Hout = [None] * Tau
			Hout[0] = owl.zeros([N, 1])

			act_ig = [None] * Tau
			act_fg = [None] * Tau
			act_og = [None] * Tau
			act_ff = [None] * Tau

			C = [None] * Tau
			C[0] = owl.zeros([N, 1])
			dY = [None] * Tau

			dBd = owl.zeros([model.Layers[2], 1]) #dY.sum(0)
			dWd = owl.zeros([model.Layers[2], model.Layers[1]]) 
			dHout = [None] * Tau #dY.dot(model.decoder_weights.transpose())
			dEmb = [None] * Tau

			##### Forward pass #####
			# For each time step

			for t in range(1, Tau):
				# predict the (t+1)'th word from the t'th word
				data[t] = model.emb_weight[sent[t - 1]]
				NVector = np.zeros((K, 1))
				NVector[sent[t]] = 1
				target = owl.from_numpy(NVector).trans()

				act_ig[t] = model.ig_weight_data * data[t] + model.ig_weight_prev * Hout[t - 1] + model.ig_weight_cell * C[t - 1] + model.ig_weight_bias
				act_ig[t] = ele.sigm(act_ig[t])

				act_fg[t] = model.fg_weight_data * data[t] + model.fg_weight_prev * Hout[t - 1] + model.fg_weight_cell * C[t - 1] + model.fg_weight_bias
				act_fg[t] = ele.sigm(act_fg[t])

				act_ff[t] = model.ff_weight_data * data[t] + model.ff_weight_prev * Hout[t - 1] + model.ff_weight_bias
				act_ff[t] = ele.tanh(act_ff[t])

				C[t] = ele.mult(act_ig[t], act_ff[t]) + ele.mult(act_fg[t], C[t - 1])

				act_og[t] = model.og_weight_data * data[t] + model.og_weight_prev * Hout[t - 1] + model.og_weight_cell * C[t] + model.og_weight_bias
				act_og[t] = ele.sigm(act_og[t])

				if tanhC_version:
					Hout[t] = ele.mult(act_og[t], ele.tanh(C[t]))
				else:
					Hout[t] = ele.mult(act_og[t], C[t])

				Y = softmax(model.decoder_weights * Hout[t] + model.decoder_bias)

				# BP to Hout
				dY[t] = Y - target
				dBd += dY[t]
				dWd += dY[t] * Hout[t].trans()
				dHout[t] = model.decoder_weights.trans() * dY[t]

				# evaluation
				output = Y.to_numpy()			# Can directly get a single element from Y
				# print output[0, sent[t]]
				sent_ll += math.log(max(output[0, sent[t]],1e-20), 2)

				#print "Y_0[t]",Y_o[t]
				#print "Y_o[t][sent[t]]",Y_o[t][sent[t]]
				#print np.sum(output.to_numpy())
				# output = Ym[t].trans() * data[t]
				# sent_ll += math.log10( max(np.sum(output.to_numpy()),1e-20) )
			##### Initialize gradient vectors #####
				

			weight_update_ig_data = owl.zeros([model.Layers[1], model.Layers[0]])
			weight_update_ig_prev = owl.zeros([model.Layers[1], model.Layers[1]])
			weight_update_ig_cell = owl.zeros([model.Layers[1], model.Layers[1]])
			weight_update_ig_bias = owl.zeros([model.Layers[1], 1])

			weight_update_fg_data = owl.zeros([model.Layers[1], model.Layers[0]])
			weight_update_fg_prev = owl.zeros([model.Layers[1], model.Layers[1]])
			weight_update_fg_cell = owl.zeros([model.Layers[1], model.Layers[1]])
			weight_update_fg_bias = owl.zeros([model.Layers[1], 1])

			weight_update_og_data = owl.zeros([model.Layers[1], model.Layers[0]])
			weight_update_og_prev = owl.zeros([model.Layers[1], model.Layers[1]])
			weight_update_og_cell = owl.zeros([model.Layers[1], model.Layers[1]])
			weight_update_og_bias = owl.zeros([model.Layers[1], 1])

			weight_update_ff_data = owl.zeros([model.Layers[1], model.Layers[0]])
			weight_update_ff_prev = owl.zeros([model.Layers[1], model.Layers[1]])
			weight_update_ff_bias = owl.zeros([model.Layers[1], 1])

			dC = [None] * Tau

			for t in xrange(Tau):
				dC[t] = owl.zeros(C[t].shape)

			# Calculate the error and add it
			for t in reversed(range(1, Tau)):
				#print "sent",sent
				#print "t",t

				# BP from og controled gate and og
				if tanhC_version:
					tanhC = ele.tanh(C[t])
					dTanhC = ele.mult(dHout[t], act_og[t])
					sen_og = ele.mult(dHout[t], tanhC)
					dC[t] += ele.mult((1 - ele.mult(tanhC, tanhC)), dTanhC)
				else:
					sen_og = ele.mult(C[t], dHout[t])
					dC[t] += ele.mult(act_og[t], dHout[t])

				# BP from og
				sen_og = ele.mult(ele.mult(act_og[t], (1.0 - act_og[t])), sen_og)
				dHout[t - 1] = model.og_weight_prev.trans() * sen_og
				dC[t] += model.og_weight_cell.trans() * sen_og
				dEmb[t] = model.og_weight_data.trans() * sen_og

				# BP from fg controled gate
				sen_fg = ele.mult(C[t - 1], dC[t])
				dC[t - 1] += ele.mult(act_fg[t], dC[t])
				
				# BP from ig controled gate
				sen_ig = ele.mult(act_ff[t], dC[t])
				sen_ff = ele.mult(act_ig[t], dC[t])
				sen_ff = ele.mult((1 - ele.mult(act_ff[t], act_ff[t])), sen_ff)
				dEmb[t] += model.ff_weight_data.trans() * sen_ff
				
				# BP from fg
				sen_fg = ele.mult(ele.mult(act_fg[t], (1.0 - act_fg[t])), sen_fg)
				dHout[t - 1] += model.fg_weight_prev.trans() * sen_fg
				dC[t - 1] += model.fg_weight_cell.trans() * sen_fg
				dEmb[t] += model.fg_weight_data.trans() * sen_fg

				# BP from ig
				sen_ig = ele.mult(ele.mult(act_ig[t], (1.0 - act_ig[t])), sen_ig)
				dHout[t - 1] += model.ig_weight_prev.trans() * sen_ig
				dC[t - 1] += model.ig_weight_cell.trans() * sen_ig
				dEmb[t] += model.ig_weight_data.trans() * sen_ig

				# derivatives on weight matrix and bias
				weight_update_ig_data += sen_ig * data[t].trans()
				weight_update_ig_prev += sen_ig * Hout[t - 1].trans()
				weight_update_ig_cell += sen_ig * C[t - 1].trans()
				weight_update_ig_bias += sen_ig

				weight_update_fg_data += sen_fg * data[t].trans()
				weight_update_fg_prev += sen_fg * Hout[t - 1].trans()
				weight_update_fg_cell += sen_fg * C[t - 1].trans()
				weight_update_fg_bias += sen_fg

				weight_update_og_data += sen_og * data[t].trans()
				weight_update_og_prev += sen_og * Hout[t - 1].trans()
				weight_update_og_cell += sen_og * C[t].trans()
				weight_update_og_bias += sen_og

				weight_update_ff_data += sen_ff * data[t].trans()
				weight_update_ff_prev += sen_ff * Hout[t - 1].trans()
				weight_update_ff_bias += sen_ff


			# normalize the gradients
			rate = learning_rate / Tau

			# weight update
			model.ig_weight_prev -= rate * weight_update_ig_prev
			model.ig_weight_data -= rate * weight_update_ig_data
			model.ig_weight_cell -= rate * weight_update_ig_cell
			model.ig_weight_bias -= rate * weight_update_ig_bias

			model.fg_weight_prev -= rate * weight_update_fg_prev
			model.fg_weight_data -= rate * weight_update_fg_data
			model.fg_weight_cell -= rate * weight_update_fg_cell
			model.fg_weight_bias -= rate * weight_update_fg_bias

			model.og_weight_prev -= rate * weight_update_og_prev
			model.og_weight_data -= rate * weight_update_og_data
			model.og_weight_cell -= rate * weight_update_og_cell
			model.og_weight_bias -= rate * weight_update_og_bias

			model.ff_weight_prev -= rate * weight_update_ff_prev
			model.ff_weight_data -= rate * weight_update_ff_data
			model.ff_weight_bias -= rate * weight_update_ff_bias

			model.decoder_weights -= rate * dWd
			model.decoder_bias -= rate * dBd

			for t in range(1, Tau):
				model.emb_weight[sent[t - 1]] -= rate * dEmb[t]

			# Print results
			epoch_ll += sent_ll
			# print(" Sentence %d LL: %f" % (sent_id, sent_ll))

			
		epoch_ent = epoch_ll * (-1) / words
		epoch_ppl = 2 ** epoch_ent
		cur_time = time.time()
		print("Epoch %d (alpha=%f) PPL=%f" % (epoch_id, learning_rate, epoch_ppl))
		print "  time consumed:", cur_time - last_time
		last_time = cur_time

	return model, learning_rate
Exemple #11
0
def train_network(model,
                  num_epochs=100,
                  minibatch_size=256,
                  dropout_rate=0.5,
                  eps_w=0.01,
                  eps_b=0.01,
                  mom=0.9,
                  wd=0.0005):
    gpu = owl.create_gpu_device(1)
    owl.set_device(gpu)
    num_layers = 20
    count = 0
    last = time.time()

    dp = ImageNetDataProvider(
        mean_file='/home/minjie/data/imagenet/imagenet_mean.binaryproto',
        train_db='/home/minjie/data/imagenet/ilsvrc12_train_lmdb',
        val_db='/home/minjie/data/imagenet/ilsvrc12_val_lmdb',
        test_db='/home/minjie/data/imagenet/ilsvrc12_test_lmdb')

    acts = [None] * num_layers
    sens = [None] * num_layers

    for i in xrange(num_epochs):
        print "---------------------Epoch #", i
        sys.stdout.flush()
        for (samples, labels) in dp.get_train_mb(minibatch_size):
            num_samples = samples.shape[0]

            acts = [None] * num_layers
            sens = [None] * num_layers

            # FF
            acts[0] = owl.from_nparray(samples).reshape(
                [227, 227, 3, num_samples])
            target = owl.from_nparray(labels)

            acts1 = conv_forward(acts[0], model.weights[0], model.bias[0],
                                 model.conv_infos[0])
            acts[1] = ele.relu(
                acts1
            )  #(conv_forward(acts[0], model.weights[0], model.bias[0], model.conv_infos[0])) # conv1
            acts[2] = pooling_forward(acts[1], model.pooling_infos[0])  # pool1
            acts3 = conv_forward(acts[2], model.weights[1], model.bias[1],
                                 model.conv_infos[1])  # conv2
            acts[3] = ele.relu(
                acts3
            )  #(conv_forward(acts[2], model.weights[1], model.bias[1], model.conv_infos[1])) # conv2
            acts[4] = pooling_forward(acts[3], model.pooling_infos[1])  # pool2
            acts5 = conv_forward(acts[4], model.weights[2], model.bias[2],
                                 model.conv_infos[2])  # conv3
            acts[5] = ele.relu(
                acts5
            )  #(conv_forward(acts[4], model.weights[2], model.bias[2], model.conv_infos[2])) # conv3
            acts6 = conv_forward(acts[5], model.weights[3], model.bias[3],
                                 model.conv_infos[3])  # conv4
            acts[6] = ele.relu(
                acts6
            )  #(conv_forward(acts[5], model.weights[3], model.bias[3], model.conv_infos[3])) # conv4
            acts7 = conv_forward(acts[6], model.weights[4], model.bias[4],
                                 model.conv_infos[4])  # conv5
            acts[7] = ele.relu(
                acts7
            )  #(conv_forward(acts[6], model.weights[4], model.bias[4], model.conv_infos[4])) # conv5
            acts[8] = pooling_forward(acts[7], model.pooling_infos[2])  # pool5
            re_acts8 = acts[8].reshape(
                [np.prod(acts[8].shape[0:3]), num_samples])
            acts9 = model.weights[5] * re_acts8 + model.bias[5]  # fc6
            acts[9] = ele.relu(
                acts9)  #(model.weights[5] * re_acts8 + model.bias[5]) # fc6
            mask6 = owl.randb(acts[9].shape, dropout_rate)
            acts[9] = ele.mult(acts[9], mask6)  # drop6
            acts10 = model.weights[6] * acts[9] + model.bias[6]  # fc7
            acts[10] = ele.relu(
                acts10)  #(model.weights[6] * acts[9] + model.bias[6]) # fc7
            mask7 = owl.randb(acts[10].shape, dropout_rate)
            acts[10] = ele.mult(acts[10], mask7)  # drop7
            acts[11] = model.weights[7] * acts[10] + model.bias[7]  # fc8
            acts[12] = softmax_forward(
                acts[11].reshape([1000, 1, 1, num_samples]),
                soft_op.instance).reshape([1000, num_samples])  # prob

            # error
            sens[11] = acts[12] - target

            # BP
            sens[10] = model.weights[7].trans() * sens[11]  # fc8
            sens[10] = ele.mult(sens[10], mask7)  # drop7
            sens[10] = ele.relu_back(sens[10], acts[10], acts10)  # relu7
            sens[9] = model.weights[6].trans() * sens[10]
            sens[9] = ele.mult(sens[9], mask6)  # drop6
            sens[9] = ele.relu_back(sens[9], acts[9], acts9)  # relu6
            sens[8] = (model.weights[5].trans() * sens[9]).reshape(
                acts[8].shape)  # fc6
            sens[7] = pooling_backward(sens[8], acts[8], acts[7],
                                       model.pooling_infos[2])  # pool5
            sens[7] = ele.relu_back(sens[7], acts[7], acts7)  # relu5
            sens[6] = conv_backward_data(sens[7], model.weights[4],
                                         model.conv_infos[4])  # conv5
            sens[6] = ele.relu_back(sens[6], acts[6], acts6)  # relu4
            sens[5] = conv_backward_data(sens[6], model.weights[3],
                                         model.conv_infos[3])  # conv4
            sens[5] = ele.relu_back(sens[5], acts[5], acts5)  # relu3
            sens[4] = conv_backward_data(sens[5], model.weights[2],
                                         model.conv_infos[2])  # conv3
            sens[3] = pooling_backward(sens[4], acts[4], acts[3],
                                       model.pooling_infos[1])  # pool2
            sens[3] = ele.relu_back(sens[3], acts[3], acts3)  # relu2
            sens[2] = conv_backward_data(sens[3], model.weights[1],
                                         model.conv_infos[1])  # conv2
            sens[1] = pooling_backward(sens[2], acts[2], acts[1],
                                       model.pooling_infos[0])  # pool1
            sens[1] = ele.relu_back(sens[1], acts[1], acts1)  # relu1

            model.weightsdelta[
                7] = mom * model.weightsdelta[7] - eps_w / num_samples * (
                    sens[11] * acts[10].trans() + wd * model.weights[7])
            model.biasdelta[7] = mom * model.biasdelta[
                7] - eps_b / num_samples * sens[11].sum(1)

            model.weightsdelta[
                6] = mom * model.weightsdelta[6] - eps_w / num_samples * (
                    sens[10] * acts[9].trans() + wd * model.weights[6])
            model.biasdelta[6] = mom * model.biasdelta[
                6] - eps_b / num_samples * sens[10].sum(1)

            model.weightsdelta[
                5] = mom * model.weightsdelta[5] - eps_w / num_samples * (
                    sens[9] * re_acts8.trans() + wd * model.weights[5])
            model.biasdelta[5] = mom * model.biasdelta[
                5] - eps_b / num_samples * sens[9].sum(1)

            model.weightsdelta[
                4] = mom * model.weightsdelta[4] - eps_w / num_samples * (
                    conv_backward_filter(sens[7], acts[6], model.conv_infos[4])
                    + wd * model.weights[4])
            model.biasdelta[4] = mom * model.biasdelta[
                4] - eps_b / num_samples * conv_backward_bias(sens[7])

            model.weightsdelta[
                3] = mom * model.weightsdelta[3] - eps_w / num_samples * (
                    conv_backward_filter(sens[6], acts[5], model.conv_infos[3])
                    + wd * model.weights[3])
            model.biasdelta[3] = mom * model.biasdelta[
                3] - eps_b / num_samples * conv_backward_bias(sens[6])

            model.weightsdelta[
                2] = mom * model.weightsdelta[2] - eps_w / num_samples * (
                    conv_backward_filter(sens[5], acts[4], model.conv_infos[2])
                    + wd * model.weights[2])
            model.biasdelta[2] = mom * model.biasdelta[
                2] - eps_b / num_samples * conv_backward_bias(sens[5])

            model.weightsdelta[
                1] = mom * model.weightsdelta[1] - eps_w / num_samples * (
                    conv_backward_filter(sens[3], acts[2], model.conv_infos[1])
                    + wd * model.weights[1])
            model.biasdelta[1] = mom * model.biasdelta[
                1] - eps_b / num_samples * conv_backward_bias(sens[3])

            model.weightsdelta[
                0] = mom * model.weightsdelta[0] - eps_w / num_samples * (
                    conv_backward_filter(sens[1], acts[0], model.conv_infos[0])
                    + wd * model.weights[0])
            model.biasdelta[0] = mom * model.biasdelta[
                0] - eps_b / num_samples * conv_backward_bias(sens[1])

            for k in range(8):
                model.weights[k] += model.weightsdelta[k]
                model.bias[k] += model.biasdelta[k]

            count = count + 1
            if count % 10 == 0:
                print_training_accuracy(acts[12], target, num_samples)
                print "time: %s" % (time.time() - last)
                last = time.time()
Exemple #12
0
            z_h = 0-(reconstruction * weights + bias_h)
            hiddens = 1.0 / (1 + owl.NArray.exp(z_h))
    
            #Get negative Phase
            #print "- phase"
            d_weights -= reconstruction.trans()* hiddens
            d_bias_v -= reconstruction.sum(0)
            d_bias_h -= hiddens.sum(0)
    
            # update weights
            #print "update"
            weights += epsilon/batch_size * d_weights
            bias_v += epsilon/batch_size * d_bias_v
            bias_h += epsilon/batch_size * d_bias_h
    
            #Compute errors
            #print "compute errors"
            errs = (reconstruction - training_set)
            errs = el.mult(errs,errs)
            
            #owl.set_device(cpu)
            tmp = errs.sum(0)
            tmp2 = tmp.sum(0)
            err.append(tmp2.to_numpy()/reduce(add,errs.shape))
            owl.wait_for_all()
            #owl.set_device(dev)
    
        print("Mean squared error: %f" % np.mean(err))
        print("Time: %f" % (time.time() - start_time))
    print "Termination"
Exemple #13
0
 def bp(self, y, phase):
     if phase == "TRAIN":
         return ele.mult(y, self.dropmask)*self.scale
     else:
         return y
Exemple #14
0
def LSTM_train(model, sents, words, learning_rate, EPOCH, tanhC_version=1):

    # Constants
    N = model.Layers[1]  # Number of units
    K = model.Layers[0]  # Vocabulary size

    # For each epoch
    last_ll = 1e99
    for epoch_id in range(EPOCH, EPOCH + 10):
        print 'Start epoch #', epoch_id
        last_time = time.time()
        epoch_ll = 0
        tau_sum = 0
        # For each sentence
        for sent_id, sent in enumerate(sents):
            #print "sent_id",sent_id
            #print "sent", sent
            #print "sents", sents
            ##### Initialize activations #####
            Tau = len(sent)
            tau_sum += Tau
            sent_ll = 0  # Sentence log likelihood
            batch_size = Tau

            data = [None] * Tau
            prev = [None] * Tau
            data[0] = owl.zeros([K, 1])
            # embed = np.zeros((K, 1))
            # embed[sent[0]] = 1
            # data[0] = owl.from_numpy(embed).trans()

            Hout = [None] * Tau
            Hout[0] = owl.zeros([N, 1])

            act_ig = [None] * Tau
            act_fg = [None] * Tau
            act_og = [None] * Tau
            act_ff = [None] * Tau

            C = [None] * Tau
            C[0] = owl.zeros([N, 1])
            Ym = [None] * Tau
            dY = [None] * Tau

            dBd = owl.zeros([model.Layers[2], 1])  #dY.sum(0)
            dWd = owl.zeros([model.Layers[1],
                             model.Layers[2]])  #Hout.transpose().dot(dY)
            dHout = [None] * Tau  #dY.dot(model.decoder_weights.transpose())

            ##### Forward pass #####
            # For each time step
            for t in range(1, Tau):
                #prev[t] = Hout[t - 1]
                prev[t] = owl.zeros([N, 1])
                data[t] = owl.zeros([K, 1])
                #embed = np.zeros((K, 1))
                #embed[sent[t]] = 1
                #data[t] = owl.from_numpy(embed).trans()

                act_ig[t] = model.ig_weight_data.trans() * data[
                    t - 1] + model.ig_weight_prev.trans(
                    ) * prev[t] + model.ig_weight_bias
                act_fg[t] = model.fg_weight_data.trans() * data[
                    t - 1] + model.fg_weight_prev.trans(
                    ) * prev[t] + model.fg_weight_bias
                act_og[t] = model.og_weight_data.trans() * data[
                    t - 1] + model.og_weight_prev.trans(
                    ) * prev[t] + model.og_weight_bias
                act_ff[t] = model.ff_weight_data.trans() * data[
                    t - 1] + model.ff_weight_prev.trans(
                    ) * prev[t] + model.ff_weight_bias

                act_ig[t] = ele.sigm(act_ig[t])
                act_fg[t] = ele.sigm(act_fg[t])
                act_og[t] = ele.sigm(act_og[t])
                act_ff[t] = ele.tanh(act_ff[t])

                C[t] = ele.mult(act_ig[t], act_ff[t]) + ele.mult(
                    act_fg[t], C[t - 1])

                if tanhC_version:
                    Hout[t] = ele.mult(act_og[t], ele.tanh(C[t]))
                else:
                    Hout[t] = ele.mult(act_og[t], C[t])
                Ym[t] = softmax(model.decoder_weights.trans() * Hout[t] +
                                model.decoder_bias)

                dY[t] = data[t] - Ym[t]
                dBd += dY[t] / batch_size
                dWd += Hout[t] * dY[t].trans() / batch_size
                dHout[t] = model.decoder_weights * dY[t]

            #print "Y_0[t]",Y_o[t]
            #print "Y_o[t][sent[t]]",Y_o[t][sent[t]]
            #print np.sum(output.to_numpy())
            # output = Ym[t].trans() * data[t]
            # sent_ll += math.log10( max(np.sum(output.to_numpy()),1e-20) )
            ##### Initialize gradient vectors #####
            #Ym[-1].wait_for_eval()
            for t in range(1, Tau):
                Ym[t].wait_for_eval()
            #output = Ym[t].trans() * data[t]
            #sent_ll += math.log10( max(np.sum(output.to_numpy()),1e-20) )
            if sent_id % 100 == 0:
                cur_time = time.time()
                print 'Finished', sent_id, 'sentences. Time used:', cur_time - last_time, 's. sent/s:', float(
                    sent_id) / (cur_time - last_time), 'tau_sum=', tau_sum
                #print owl.print_profiler_result()
                tau_sum = 0
            continue

            sen_ig = [None] * Tau
            sen_fg = [None] * Tau
            sen_og = [None] * Tau
            sen_ff = [None] * Tau

            weight_update_ig_data = owl.zeros(
                [model.Layers[0], model.Layers[1]])
            weight_update_ig_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_ig_bias = owl.zeros([model.Layers[1], 1])

            weight_update_fg_data = owl.zeros(
                [model.Layers[0], model.Layers[1]])
            weight_update_fg_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_fg_bias = owl.zeros([model.Layers[1], 1])

            weight_update_og_data = owl.zeros(
                [model.Layers[0], model.Layers[1]])
            weight_update_og_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_og_bias = owl.zeros([model.Layers[1], 1])

            weight_update_ff_data = owl.zeros(
                [model.Layers[0], model.Layers[1]])
            weight_update_ff_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_ff_bias = owl.zeros([model.Layers[1], 1])

            dHin = owl.zeros([model.Layers[1], model.Layers[1]])
            dC = [None] * Tau
            for t in xrange(Tau):
                dC[t] = owl.zeros(C[t].shape)

            # Calculate the error and add it
            for t in reversed(range(1, len(sent))):
                #print "sent",sent
                #print "t",t
                if tanhC_version:
                    tanhCt = ele.tanh(C[t])
                    sen_og[t] = ele.mult(tanhCt, dHout[t])
                    dC[t] += ele.mult((1 - ele.mult(tanhCt, tanhCt)),
                                      ele.mult(act_og[t], dHout[t]))
                else:
                    sen_og[t] = ele.mult(C[t], dHout[t])
                    dC[t] += ele.mult(act_og[t], dHout[t])

                sen_fg[t] = owl.zeros([model.Layers[1], 1])
                if t > 0:
                    sen_fg[t] = ele.mult(C[t - 1], dC[t])
                    dC[t - 1] += ele.mult(act_fg[t], dC[t])
                sen_ig[t] = ele.mult(act_ff[t], dC[t])
                sen_ff[t] = ele.mult(act_ig[t], dC[t])

                # backprop activation functions
                sen_ff[t] = ele.mult((1 - ele.mult(act_ff[t], act_ff[t])),
                                     sen_ff[t])
                sen_ig[t] = ele.mult(ele.mult(act_ig[t], (1.0 - act_ig[t])),
                                     sen_ig[t])
                sen_fg[t] = ele.mult(ele.mult(act_fg[t], (1.0 - act_fg[t])),
                                     sen_fg[t])
                sen_og[t] = ele.mult(ele.mult(act_og[t], (1.0 - act_og[t])),
                                     sen_og[t])

                # backprop matrix multiply
                weight_update_ig_data += data[t] * sen_ig[t].trans()
                weight_update_ig_prev += prev[t] * sen_ig[t].trans()
                weight_update_fg_bias += sen_ig[t]  # sen_ig[t].sum(0 or 1)

                weight_update_fg_data += data[t] * sen_fg[t].trans()
                weight_update_fg_prev += prev[t] * sen_fg[t].trans()
                weight_update_fg_bias += sen_fg[t]

                weight_update_og_data += data[t] * sen_og[t].trans()
                weight_update_og_prev += prev[t] * sen_og[t].trans()
                weight_update_og_bias += sen_og[t]

                weight_update_ff_data += data[t] * sen_ff[t].trans()
                weight_update_ff_prev += prev[t] * sen_ff[t].trans()
                weight_update_ff_bias += sen_ff[t]

                if t > 1:
                    dHout[t - 1] += model.ig_weight_prev.trans() * sen_ig[t]
                    dHout[t - 1] += model.fg_weight_prev.trans() * sen_fg[t]
                    dHout[t - 1] += model.og_weight_prev.trans() * sen_og[t]
                    dHout[t - 1] += model.ff_weight_prev.trans() * sen_ff[t]

            # normalize the gradients
            # weight update
            model.ig_weight_prev += learning_rate / batch_size * weight_update_ig_prev
            model.ig_weight_data += learning_rate / batch_size * weight_update_ig_data
            model.ig_weight_bias += learning_rate / batch_size * weight_update_ig_bias

            model.fg_weight_prev += learning_rate / batch_size * weight_update_fg_prev
            model.fg_weight_data += learning_rate / batch_size * weight_update_fg_data
            model.fg_weight_bias += learning_rate / batch_size * weight_update_fg_bias

            model.og_weight_prev += learning_rate / batch_size * weight_update_og_prev
            model.og_weight_data += learning_rate / batch_size * weight_update_og_data
            model.og_weight_bias += learning_rate / batch_size * weight_update_og_bias

            model.ff_weight_prev += learning_rate / batch_size * weight_update_ff_prev
            model.ff_weight_data += learning_rate / batch_size * weight_update_ff_data
            model.ff_weight_bias += learning_rate / batch_size * weight_update_ff_bias

            model.decoder_weights += learning_rate * dWd
            model.decoder_bias += learning_rate * dBd

            # Print results
            epoch_ll += sent_ll
            # print(" Sentence %d LL: %f" % (sent_id, sent_ll))
        epoch_ent = epoch_ll * (-1) / words
        epoch_ppl = 10**epoch_ent
        cur_time = time.time()
        print("Epoch %d (alpha=%f) PPL=%f" %
              (epoch_id, learning_rate, epoch_ppl))
        print "  time consumed:", cur_time - last_time
        last_time = cur_time
        if last_ll > epoch_ll:
            learning_rate /= 2.0
        last_ll = epoch_ll

    return model, learning_rate
Exemple #15
0
 def bp(self, y):
     return ele.mult(y, self.dropmask)
Exemple #16
0
 def ff(self, x):
     self.dropmask = owl.randb(x, self.params.dropout_param.dropout_ratio)
     return ele.mult(x, self.dropmask)
Exemple #17
0
    def train_one_mb(self, data, label, dropout_rate):
        num_samples = data.shape[-1]
        num_layers = 12
        acts = [None] * num_layers
        sens = [None] * num_layers
        weightsgrad = [None] * self.num_weights
        biasgrad = [None] * self.num_weights

        # FF
        acts[0] = data
        acts[1] = ele.relu(self.convs[0].ff(acts[0], self.weights[0],
                                            self.bias[0]))  # conv1
        acts[2] = self.poolings[0].ff(acts[1])  # pool1
        acts[3] = ele.relu(self.convs[1].ff(acts[2], self.weights[1],
                                            self.bias[1]))  # conv2
        acts[4] = self.poolings[1].ff(acts[3])  # pool2
        acts[5] = ele.relu(self.convs[2].ff(acts[4], self.weights[2],
                                            self.bias[2]))  # conv3
        acts[6] = ele.relu(self.convs[3].ff(acts[5], self.weights[3],
                                            self.bias[3]))  # conv4
        acts[7] = ele.relu(self.convs[4].ff(acts[6], self.weights[4],
                                            self.bias[4]))  # conv5
        acts[8] = self.poolings[2].ff(acts[7])  # pool5
        re_acts8 = acts[8].reshape([np.prod(acts[8].shape[0:3]), num_samples])
        acts[9] = ele.relu(self.weights[5] * re_acts8 + self.bias[5])  # fc6
        mask6 = owl.randb(acts[9].shape, dropout_rate)
        acts[9] = ele.mult(acts[9], mask6)  # drop6
        acts[10] = ele.relu(self.weights[6] * acts[9] + self.bias[6])  # fc7
        mask7 = owl.randb(acts[10].shape, dropout_rate)
        acts[10] = ele.mult(acts[10], mask7)  # drop7
        acts[11] = self.weights[7] * acts[10] + self.bias[7]  # fc8

        out = co.softmax(acts[11], co.soft_op.instance)  # prob

        sens[11] = out - label
        sens[10] = self.weights[7].trans() * sens[11]  # fc8
        sens[10] = ele.mult(sens[10], mask7)  # drop7
        sens[10] = ele.relu_back(sens[10], acts[10])  # relu7
        sens[9] = self.weights[6].trans() * sens[10]
        sens[9] = ele.mult(sens[9], mask6)  # drop6
        sens[9] = ele.relu_back(sens[9], acts[9])  # relu6
        sens[8] = (self.weights[5].trans() * sens[9]).reshape(
            acts[8].shape)  # fc6
        sens[7] = ele.relu_back(self.poolings[2].bp(sens[8], acts[8], acts[7]),
                                acts[7])  # pool5, relu5
        sens[6] = ele.relu_back(self.convs[4].bp(sens[7], self.weights[4]),
                                acts[6])  # conv5, relu4
        sens[5] = ele.relu_back(self.convs[3].bp(sens[6], self.weights[3]),
                                acts[5])  # conv4, relu3
        sens[4] = self.convs[2].bp(sens[5], self.weights[2])  # conv3
        sens[3] = ele.relu_back(self.poolings[1].bp(sens[4], acts[4], acts[3]),
                                acts[3])  # pool2, relu2
        sens[2] = self.convs[1].bp(sens[3], self.weights[1])  # conv2
        sens[1] = self.poolings[0].bp(sens[2], acts[2], acts[1])  # pool1
        sens[1] = ele.relu_back(sens[1], acts[1])  # relu1

        weightsgrad[7] = sens[11] * acts[10].trans()
        weightsgrad[6] = sens[10] * acts[9].trans()
        weightsgrad[5] = sens[9] * re_acts8.trans()
        weightsgrad[4] = self.convs[4].weight_grad(sens[7], acts[6])
        weightsgrad[3] = self.convs[3].weight_grad(sens[6], acts[5])
        weightsgrad[2] = self.convs[2].weight_grad(sens[5], acts[4])
        weightsgrad[1] = self.convs[1].weight_grad(sens[3], acts[2])
        weightsgrad[0] = self.convs[0].weight_grad(sens[1], acts[0])
        biasgrad[7] = sens[11].sum(1)
        biasgrad[6] = sens[10].sum(1)
        biasgrad[5] = sens[9].sum(1)
        biasgrad[4] = self.convs[4].bias_grad(sens[7])
        biasgrad[3] = self.convs[3].bias_grad(sens[6])
        biasgrad[2] = self.convs[2].bias_grad(sens[5])
        biasgrad[1] = self.convs[1].bias_grad(sens[3])
        biasgrad[0] = self.convs[0].bias_grad(sens[1])
        return (out, weightsgrad, biasgrad)
Exemple #18
0
 def getloss(self):
     lossmat = ele.mult(ele.ln(self.ff_y), self.y)
     res = lossmat.sum(0).sum(1).to_numpy()
     return -res[0][0] / lossmat.shape[1]
Exemple #19
0
         #print "- phase"
         d_weights -= reconstruction.trans()* hiddens
         d_bias_v -= reconstruction.sum(0)
         d_bias_h -= hiddens.sum(0)
 
         # update weights
         #print "update"
         weights += epsilon/batch_size * d_weights
         bias_v += epsilon/batch_size * d_bias_v
         bias_h += epsilon/batch_size * d_bias_h
 
         #Compute errors
         #print "compute errors"
         owl.wait_for_all()
         diff = reconstruction - training_set
         sqrdiff = el.mult(diff,diff)
         sum = sqrdiff.sum([0,1]).to_numpy()[0,0]
         mean =  sum /reduce(mul,sqrdiff.shape)
         err.append(mean)
         #owl.set_device(dev)
 
     print("Mean squared error: %f" % np.mean(err))
     print("Time: %f" % (time.time() - start_time))
     plt.hist((weights - weights_old).to_numpy().flatten(),10)
     plt.show()
     
     im = np.zeros([28,28*num_hid])
     for h in range(num_hid):
         im[:,h*28:(h+1)*28] = weights.to_numpy()[h,:].reshape([28,28])
     plt.hist(weights.to_numpy().flatten(),10)
     plt.show()
Exemple #20
0
 def getloss(self):
     ''' Get the loss of the softmax (cross entropy)
     '''
     lossmat = ele.mult(ele.ln(self.ff_y), self.y)
     res = lossmat.sum(0).sum(1).to_numpy()
     return -res[0][0] / lossmat.shape[1]
Exemple #21
0
def LSTM_test(model, sents, words, tanhC_version = 1):

	N = model.Layers[1]
	K = model.Layers[2]

	test_ll = 0
	# For each sentence
	for sent_id, sent in enumerate(sents):
		#print sent_id
		#print "sent", sent
		#print "sents", sents
		##### Initialize activations #####

		Tau = len(sent)
		sent_ll = 0 # Sentence log likelihood

		data = [None] * Tau

		Hout = [None] * Tau
		Hout[0] = owl.zeros([N, 1])

		act_ig = [None] * Tau
		act_fg = [None] * Tau
		act_og = [None] * Tau
		act_ff = [None] * Tau

		C = [None] * Tau
		C[0] = owl.zeros([N, 1])

		##### Forward pass #####
		# For each time step

		for t in range(1, Tau):
			# predict the (t+1)'th word from the t'th word
			data[t] = model.emb_weight[sent[t - 1]]

			act_ig[t] = model.ig_weight_data * data[t] + model.ig_weight_prev * Hout[t - 1] + model.ig_weight_cell * C[t - 1] + model.ig_weight_bias
			act_ig[t] = ele.sigm(act_ig[t])

			act_fg[t] = model.fg_weight_data * data[t] + model.fg_weight_prev * Hout[t - 1] + model.fg_weight_cell * C[t - 1] + model.fg_weight_bias
			act_fg[t] = ele.sigm(act_fg[t])

			act_ff[t] = model.ff_weight_data * data[t] + model.ff_weight_prev * Hout[t - 1] + model.ff_weight_bias
			act_ff[t] = ele.tanh(act_ff[t])

			C[t] = ele.mult(act_ig[t], act_ff[t]) + ele.mult(act_fg[t], C[t - 1])

			act_og[t] = model.og_weight_data * data[t] + model.og_weight_prev * Hout[t - 1] + model.og_weight_cell * C[t] + model.og_weight_bias
			act_og[t] = ele.sigm(act_og[t])

			if tanhC_version:
				Hout[t] = ele.mult(act_og[t], ele.tanh(C[t]))
			else:
				Hout[t] = ele.mult(act_og[t], C[t])

			Y = softmax(model.decoder_weights * Hout[t] + model.decoder_bias)

			# evaluation
			output = Y.to_numpy()			# Can directly get a single element from Y
			# print output[0, sent[t]]
			sent_ll += math.log(max(output[0, sent[t]],1e-20), 2)

		test_ll += sent_ll

	test_ent = test_ll * (-1) / words
	test_ppl = 2 ** test_ent

	print "Test PPL =", test_ppl
Exemple #22
0
def train_network(model, num_epochs = 100, minibatch_size=256,
        dropout_rate = 0.5, eps_w = 0.01, eps_b = 0.01, mom = 0.9, wd = 0.0005):
    gpu = owl.create_gpu_device(1)
    owl.set_device(gpu)
    num_layers = 20
    count = 0
    last = time.time()

    dp = ImageNetDataProvider(mean_file='/home/minjie/data/imagenet/imagenet_mean.binaryproto',
            train_db='/home/minjie/data/imagenet/ilsvrc12_train_lmdb',
            val_db='/home/minjie/data/imagenet/ilsvrc12_val_lmdb',
            test_db='/home/minjie/data/imagenet/ilsvrc12_test_lmdb')

    acts = [None] * num_layers
    sens = [None] * num_layers

    for i in xrange(num_epochs):
        print "---------------------Epoch #", i
        sys.stdout.flush()
        for (samples, labels) in dp.get_train_mb(minibatch_size):
            num_samples = samples.shape[0]

            acts = [None] * num_layers
            sens = [None] * num_layers

            '''
            thisimg = samples[0, :]
            print thisimg
            imgdata = np.transpose(thisimg.reshape([3, 227*227])).reshape([227, 227, 3])
            print imgdata
            img = Image.fromarray(imgdata.astype(np.uint8))
            img.save('testimg.jpg', format='JPEG')
            exit(0)
            '''

            # FF
            acts[0] = owl.from_nparray(samples).reshape([227, 227, 3, num_samples])
            #print np.array(acts[0].tolist())[0:227*227*3]

            target = owl.from_nparray(labels)

            #np.set_printoptions(linewidth=200)
            #print acts[0].shape, model.weights[0].shape, model.bias[0].shape
            #im = np.array(acts[0].tolist()).reshape([num_samples, 227, 227, 3])
            #print im[0,:,:,0]
            #print im[0,:,:,1]
            #print im[0,:,:,2]
            #print target.max_index(0).tolist()[0:20]
            #sys.exit()

            acts1 = conv_forward(acts[0], model.weights[0], model.bias[0], model.conv_infos[0])
            acts[1] = ele.relu(acts1)#(conv_forward(acts[0], model.weights[0], model.bias[0], model.conv_infos[0])) # conv1
            acts[2] = pooling_forward(acts[1], model.pooling_infos[0]) # pool1
            acts3 = conv_forward(acts[2], model.weights[1], model.bias[1], model.conv_infos[1]) # conv2
            acts[3] = ele.relu(acts3)#(conv_forward(acts[2], model.weights[1], model.bias[1], model.conv_infos[1])) # conv2
            acts[4] = pooling_forward(acts[3], model.pooling_infos[1]) # pool2
            acts5 = conv_forward(acts[4], model.weights[2], model.bias[2], model.conv_infos[2]) # conv3
            acts[5] = ele.relu(acts5)#(conv_forward(acts[4], model.weights[2], model.bias[2], model.conv_infos[2])) # conv3
            acts6 = conv_forward(acts[5], model.weights[3], model.bias[3], model.conv_infos[3]) # conv4
            acts[6] = ele.relu(acts6)#(conv_forward(acts[5], model.weights[3], model.bias[3], model.conv_infos[3])) # conv4
            acts7 = conv_forward(acts[6], model.weights[4], model.bias[4], model.conv_infos[4]) # conv5
            acts[7] = ele.relu(acts7)#(conv_forward(acts[6], model.weights[4], model.bias[4], model.conv_infos[4])) # conv5
            acts[8] = pooling_forward(acts[7], model.pooling_infos[2]) # pool5
            re_acts8 = acts[8].reshape([np.prod(acts[8].shape[0:3]), num_samples])
            acts9 = model.weights[5] * re_acts8 + model.bias[5] # fc6
            acts[9] = ele.relu(acts9)#(model.weights[5] * re_acts8 + model.bias[5]) # fc6
            mask6 = owl.randb(acts[9].shape, dropout_rate)
            acts[9] = ele.mult(acts[9], mask6) # drop6
            acts10 = model.weights[6] * acts[9] + model.bias[6] # fc7
            acts[10] = ele.relu(acts10)#(model.weights[6] * acts[9] + model.bias[6]) # fc7
            mask7 = owl.randb(acts[10].shape, dropout_rate)
            acts[10] = ele.mult(acts[10], mask7) # drop7
            acts[11] = model.weights[7] * acts[10] + model.bias[7] # fc8
            acts[12] = softmax_forward(acts[11].reshape([1000, 1, 1, num_samples]), soft_op.instance).reshape([1000, num_samples]) # prob

            # error
            sens[11] = acts[12] - target

            # BP
            sens[10] = model.weights[7].trans() * sens[11] # fc8
            sens[10] = ele.mult(sens[10], mask7) # drop7
            sens[10] = ele.relu_back(sens[10], acts[10], acts10) # relu7
            sens[9] = model.weights[6].trans() * sens[10]
            sens[9] = ele.mult(sens[9], mask6) # drop6
            sens[9] = ele.relu_back(sens[9], acts[9], acts9) # relu6
            sens[8] = (model.weights[5].trans() * sens[9]).reshape(acts[8].shape) # fc6
            sens[7] = pooling_backward(sens[8], acts[8], acts[7], model.pooling_infos[2]) # pool5
            sens[7] = ele.relu_back(sens[7], acts[7], acts7) # relu5
            sens[6] = conv_backward_data(sens[7], model.weights[4], model.conv_infos[4]) # conv5
            sens[6] = ele.relu_back(sens[6], acts[6], acts6) # relu4
            sens[5] = conv_backward_data(sens[6], model.weights[3], model.conv_infos[3]) # conv4
            sens[5] = ele.relu_back(sens[5], acts[5], acts5) # relu3
            sens[4] = conv_backward_data(sens[5], model.weights[2], model.conv_infos[2]) # conv3
            sens[3] = pooling_backward(sens[4], acts[4], acts[3], model.pooling_infos[1]) # pool2
            sens[3] = ele.relu_back(sens[3], acts[3], acts3) # relu2
            sens[2] = conv_backward_data(sens[3], model.weights[1], model.conv_infos[1]) # conv2
            sens[1] = pooling_backward(sens[2], acts[2], acts[1], model.pooling_infos[0]) # pool1
            sens[1] = ele.relu_back(sens[1], acts[1], acts1) # relu1

	    model.weightsdelta[7] = mom * model.weightsdelta[7] - eps_w / num_samples  * (sens[11] * acts[10].trans() + wd * model.weights[7])
            model.biasdelta[7] = mom * model.biasdelta[7] - eps_b / num_samples  * (sens[11].sum(1) + wd * model.bias[7])
            
	    model.weightsdelta[6] = mom * model.weightsdelta[6] - eps_w / num_samples  * (sens[10] * acts[9].trans() + wd * model.weights[6])
            model.biasdelta[6] = mom * model.biasdelta[6] - eps_b / num_samples  * (sens[10].sum(1) + wd * model.bias[6])
    	    
	    model.weightsdelta[5] = mom * model.weightsdelta[5] - eps_w / num_samples  * (sens[9] * re_acts8.trans() + wd * model.weights[5])
            model.biasdelta[5] = mom * model.biasdelta[5] - eps_b / num_samples  * (sens[9].sum(1) + wd * model.bias[5])
            	
            model.weightsdelta[4] = mom * model.weightsdelta[4] - eps_w / num_samples  * (conv_backward_filter(sens[7], acts[6], model.conv_infos[4]) + wd * model.weights[4])
	    model.biasdelta[4] = mom * model.biasdelta[4] - eps_b / num_samples  * (conv_backward_bias(sens[7]) + wd * model.bias[4])

	    model.weightsdelta[3] = mom * model.weightsdelta[3] - eps_w / num_samples  * (conv_backward_filter(sens[6], acts[5], model.conv_infos[3]) + wd * model.weights[3])
	    model.biasdelta[3] = mom * model.biasdelta[3] - eps_b / num_samples  * (conv_backward_bias(sens[6]) + wd * model.bias[3])

 	    model.weightsdelta[2] = mom * model.weightsdelta[2] - eps_w / num_samples  * (conv_backward_filter(sens[5], acts[4], model.conv_infos[2]) + wd * model.weights[2])
	    model.biasdelta[2] = mom * model.biasdelta[2] - eps_b / num_samples  * (conv_backward_bias(sens[5]) + wd * model.bias[2])

  	    model.weightsdelta[1] = mom * model.weightsdelta[1] - eps_w / num_samples  * (conv_backward_filter(sens[3], acts[2], model.conv_infos[1]) + wd * model.weights[1])
	    model.biasdelta[1] = mom * model.biasdelta[1] - eps_b / num_samples  * (conv_backward_bias(sens[3]) + wd * model.bias[1])

            model.weightsdelta[0] = mom * model.weightsdelta[0] - eps_w / num_samples  * (conv_backward_filter(sens[1], acts[0], model.conv_infos[0]) + wd * model.weights[0])
	    model.biasdelta[0] = mom * model.biasdelta[0] - eps_b / num_samples  * (conv_backward_bias(sens[1]) + wd * model.bias[0])

            for k in range(8):
                model.weights[k] += model.weightsdelta[k]
                model.bias[k] += model.biasdelta[k]

            count = count + 1
            #if count % 2 == 0:
                #acts[18].start_eval()
            if count % 10 == 0:
                print_training_accuracy(acts[12], target, num_samples)
                print "time: %s" % (time.time() - last)
                last = time.time()
Exemple #23
0
def LSTM_train(model,
               sents,
               vocab_size,
               words,
               NUM_EPOCHS=100,
               tanhC_version=1):

    # Constants
    ALPHA = 1  # Learning rate
    N = 10  # Number of units
    learning_rate = 1

    K = vocab_size  # Vocabulary size

    # For each epoch
    last_ll = 1e99
    last_time = time.time()
    for epoch_id in range(1, NUM_EPOCHS + 1):
        epoch_ll = 0
        # For each sentence
        for sent_id, sent in enumerate(sents):
            #print "sent_id",sent_id
            #print "sent", sent
            #print "sents", sents
            ##### Initialize activations #####
            Tau = len(sent)
            sent_ll = 0  # Sentence log likelihood
            batch_size = Tau

            data = [None] * Tau
            prev = [None] * Tau
            embed = np.zeros((K, 1))
            embed[sent[0]] = 1
            data[0] = owl.from_numpy(embed).trans()

            Hout = [None] * Tau
            Hout[0] = owl.zeros([N, 1])

            act_ig = [None] * Tau
            act_fg = [None] * Tau
            act_og = [None] * Tau
            act_ff = [None] * Tau

            C = [None] * Tau
            C[0] = owl.zeros([N, 1])
            Ym = [None] * Tau
            dY = [None] * Tau

            dBd = owl.zeros([model.Layers[2], 1])  #dY.sum(0)
            dWd = owl.zeros([model.Layers[1],
                             model.Layers[2]])  #Hout.transpose().dot(dY)
            dHout = [None] * Tau  #dY.dot(model.decoder_weights.transpose())

            ##### Forward pass #####
            # For each time step
            for t in range(1, Tau):
                prev[t] = Hout[t - 1]
                embed = np.zeros((K, 1))
                embed[sent[t]] = 1
                data[t] = owl.from_numpy(embed).trans()

                act_ig[t] = model.ig_weight_data.trans() * data[
                    t - 1] + model.ig_weight_prev.trans(
                    ) * prev[t] + model.ig_weight_bias
                act_fg[t] = model.fg_weight_data.trans() * data[
                    t - 1] + model.fg_weight_prev.trans(
                    ) * prev[t] + model.fg_weight_bias
                act_og[t] = model.og_weight_data.trans() * data[
                    t - 1] + model.og_weight_prev.trans(
                    ) * prev[t] + model.og_weight_bias
                act_ff[t] = model.ff_weight_data.trans() * data[
                    t - 1] + model.ff_weight_prev.trans(
                    ) * prev[t] + model.ff_weight_bias

                act_ig[t] = ele.sigm(act_ig[t])
                act_fg[t] = ele.sigm(act_fg[t])
                act_og[t] = ele.sigm(act_og[t])
                act_ff[t] = ele.tanh(act_ff[t])

                C[t] = ele.mult(act_ig[t], act_ff[t]) + ele.mult(
                    act_fg[t], C[t - 1])

                if tanhC_version:
                    Hout[t] = ele.mult(act_og[t], ele.tanh(C[t]))
                else:
                    Hout[t] = ele.mult(act_og[t], C[t])
                Ym[t] = softmax(model.decoder_weights.trans() * Hout[t] +
                                model.decoder_bias)

                dY[t] = data[t] - Ym[t]
                dBd += dY[t] / batch_size
                dWd += Hout[t] * dY[t].trans() / batch_size
                dHout[t] = model.decoder_weights * dY[t]

                #print "Y_0[t]",Y_o[t]
                #print "Y_o[t][sent[t]]",Y_o[t][sent[t]]
                #print np.sum(output.to_numpy())
                # output = Ym[t].trans() * data[t]
                # sent_ll += math.log10( max(np.sum(output.to_numpy()),1e-20) )
            ##### Initialize gradient vectors #####
            for t in range(1, Tau):
                output = Ym[t].trans() * data[t]
                sent_ll += math.log10(max(np.sum(output.to_numpy()), 1e-20))

            sen_ig = [None] * Tau
            sen_fg = [None] * Tau
            sen_og = [None] * Tau
            sen_ff = [None] * Tau

            weight_update_ig_data = owl.zeros(
                [model.Layers[0], model.Layers[1]])
            weight_update_ig_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_ig_bias = owl.zeros([model.Layers[1], 1])

            weight_update_fg_data = owl.zeros(
                [model.Layers[0], model.Layers[1]])
            weight_update_fg_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_fg_bias = owl.zeros([model.Layers[1], 1])

            weight_update_og_data = owl.zeros(
                [model.Layers[0], model.Layers[1]])
            weight_update_og_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_og_bias = owl.zeros([model.Layers[1], 1])

            weight_update_ff_data = owl.zeros(
                [model.Layers[0], model.Layers[1]])
            weight_update_ff_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_ff_bias = owl.zeros([model.Layers[1], 1])

            dHin = owl.zeros([model.Layers[1], model.Layers[1]])
            dC = [None] * Tau
            for t in xrange(Tau):
                dC[t] = owl.zeros(C[t].shape)

            # Calculate the error and add it
            for t in reversed(range(1, len(sent))):
                #print "sent",sent
                #print "t",t
                if tanhC_version:
                    tanhCt = ele.tanh(C[t])
                    sen_og[t] = ele.mult(tanhCt, dHout[t])
                    dC[t] += ele.mult((1 - ele.mult(tanhCt, tanhCt)),
                                      ele.mult(act_og[t], dHout[t]))
                else:
                    sen_og[t] = ele.mult(C[t], dHout[t])
                    dC[t] += ele.mult(act_og[t], dHout[t])

                sen_fg[t] = owl.zeros([model.Layers[1], 1])
                if t > 0:
                    sen_fg[t] = ele.mult(C[t - 1], dC[t])
                    dC[t - 1] += ele.mult(act_og[t], dC[t])
                sen_ig[t] = ele.mult(act_ff[t], dC[t])
                sen_ff[t] = ele.mult(act_ig[t], dC[t])

                # backprop activation functions
                sen_ff[t] = ele.mult((1 - ele.mult(act_ff[t], act_ff[t])),
                                     sen_ff[t])
                sen_ig[t] = ele.mult(ele.mult(act_ig[t], (1.0 - act_ig[t])),
                                     sen_ig[t])
                sen_fg[t] = ele.mult(ele.mult(act_fg[t], (1.0 - act_fg[t])),
                                     sen_fg[t])
                sen_og[t] = ele.mult(ele.mult(act_og[t], (1.0 - act_og[t])),
                                     sen_og[t])

                # backprop matrix multiply
                weight_update_ig_data += data[t] * sen_ig[t].trans()
                weight_update_ig_prev += prev[t] * sen_ig[t].trans()
                weight_update_fg_bias += sen_ig[t]  # sen_ig[t].sum(0 or 1)

                weight_update_fg_data += data[t] * sen_fg[t].trans()
                weight_update_fg_prev += prev[t] * sen_fg[t].trans()
                weight_update_fg_bias += sen_fg[t]

                weight_update_og_data += data[t] * sen_og[t].trans()
                weight_update_og_prev += prev[t] * sen_og[t].trans()
                weight_update_og_bias += sen_og[t]

                weight_update_ff_data += data[t] * sen_ff[t].trans()
                weight_update_ff_prev += prev[t] * sen_ff[t].trans()
                weight_update_ff_bias += sen_ff[t]

                if t > 1:
                    dHout[t - 1] += model.ig_weight_prev.trans() * sen_ig[t]
                    dHout[t - 1] += model.fg_weight_prev.trans() * sen_fg[t]
                    dHout[t - 1] += model.og_weight_prev.trans() * sen_og[t]
                    dHout[t - 1] += model.ff_weight_prev.trans() * sen_ff[t]

            # normalize the gradients
            # dWLSTM /= batch_size
            weight_update_ig_prev /= batch_size
            weight_update_ig_data /= batch_size
            weight_update_ig_bias /= batch_size

            weight_update_fg_prev /= batch_size
            weight_update_fg_data /= batch_size
            weight_update_fg_bias /= batch_size

            weight_update_og_prev /= batch_size
            weight_update_og_data /= batch_size
            weight_update_og_bias /= batch_size

            weight_update_ff_prev /= batch_size
            weight_update_ff_data /= batch_size
            weight_update_ff_bias /= batch_size

            # weight update
            model.ig_weight_prev += learning_rate * weight_update_ig_prev
            model.ig_weight_data += learning_rate * weight_update_ig_data
            model.ig_weight_bias += learning_rate * weight_update_ig_bias

            model.fg_weight_prev += learning_rate * weight_update_fg_prev
            model.fg_weight_data += learning_rate * weight_update_fg_data
            model.fg_weight_bias += learning_rate * weight_update_fg_bias

            model.og_weight_prev += learning_rate * weight_update_og_prev
            model.og_weight_data += learning_rate * weight_update_og_data
            model.og_weight_bias += learning_rate * weight_update_og_bias

            model.ff_weight_prev += learning_rate * weight_update_ff_prev
            model.ff_weight_data += learning_rate * weight_update_ff_data
            model.ff_weight_bias += learning_rate * weight_update_ff_bias

            model.decoder_weights += learning_rate * dWd
            model.decoder_bias += learning_rate * dBd

            # Print results
            epoch_ll += sent_ll
            # print(" Sentence %d LL: %f" % (sent_id, sent_ll))
        epoch_ent = epoch_ll * (-1) / words
        epoch_ppl = 10**epoch_ent
        cur_time = time.time()
        print("Epoch %d (alpha=%f) PPL=%f" %
              (epoch_id, learning_rate, epoch_ppl))
        print "  time consumed:", cur_time - last_time
        if last_ll > epoch_ll:
            learning_rate /= 2.0
        last_ll = epoch_ll
        last_time = cur_time
Exemple #24
0
 def bp(self, y, phase):
     if phase == "TRAIN":
         return ele.mult(y, self.dropmask) * self.scale
     else:
         return y
Exemple #25
0
def LSTM_train(model, sents, words, learning_rate, EPOCH, tanhC_version=1):

    # Constants
    N = model.Layers[1]  # Number of units
    K = model.Layers[2]  # Vocabulary size

    last_time = time.time()
    # For each epoch
    for epoch_id in range(1, EPOCH + 1):
        epoch_ll = 0
        # For each sentence
        for sent_id, sent in enumerate(sents):
            #print sent_id
            #print "sent", sent
            #print "sents", sents
            ##### Initialize activations #####

            Tau = len(sent)
            sent_ll = 0  # Sentence log likelihood

            data = [None] * Tau

            Hout = [None] * Tau
            Hout[0] = owl.zeros([N, 1])

            act_ig = [None] * Tau
            act_fg = [None] * Tau
            act_og = [None] * Tau
            act_ff = [None] * Tau

            C = [None] * Tau
            C[0] = owl.zeros([N, 1])
            dY = [None] * Tau

            dBd = owl.zeros([model.Layers[2], 1])  #dY.sum(0)
            dWd = owl.zeros([model.Layers[2], model.Layers[1]])
            dHout = [None] * Tau  #dY.dot(model.decoder_weights.transpose())
            dEmb = [None] * Tau

            ##### Forward pass #####
            # For each time step

            for t in range(1, Tau):
                # predict the (t+1)'th word from the t'th word
                data[t] = model.emb_weight[sent[t - 1]]
                NVector = np.zeros((K, 1))
                NVector[sent[t]] = 1
                target = owl.from_numpy(NVector).trans()

                act_ig[t] = model.ig_weight_data * data[
                    t] + model.ig_weight_prev * Hout[
                        t - 1] + model.ig_weight_cell * C[
                            t - 1] + model.ig_weight_bias
                act_ig[t] = ele.sigm(act_ig[t])

                act_fg[t] = model.fg_weight_data * data[
                    t] + model.fg_weight_prev * Hout[
                        t - 1] + model.fg_weight_cell * C[
                            t - 1] + model.fg_weight_bias
                act_fg[t] = ele.sigm(act_fg[t])

                act_ff[t] = model.ff_weight_data * data[
                    t] + model.ff_weight_prev * Hout[t -
                                                     1] + model.ff_weight_bias
                act_ff[t] = ele.tanh(act_ff[t])

                C[t] = ele.mult(act_ig[t], act_ff[t]) + ele.mult(
                    act_fg[t], C[t - 1])

                act_og[t] = model.og_weight_data * data[
                    t] + model.og_weight_prev * Hout[
                        t -
                        1] + model.og_weight_cell * C[t] + model.og_weight_bias
                act_og[t] = ele.sigm(act_og[t])

                if tanhC_version:
                    Hout[t] = ele.mult(act_og[t], ele.tanh(C[t]))
                else:
                    Hout[t] = ele.mult(act_og[t], C[t])

                Y = softmax(model.decoder_weights * Hout[t] +
                            model.decoder_bias)

                # BP to Hout
                dY[t] = Y - target
                dBd += dY[t]
                dWd += dY[t] * Hout[t].trans()
                dHout[t] = model.decoder_weights.trans() * dY[t]

                # evaluation
                output = Y.to_numpy(
                )  # Can directly get a single element from Y
                # print output[0, sent[t]]
                sent_ll += math.log(max(output[0, sent[t]], 1e-20), 2)

                #print "Y_0[t]",Y_o[t]
                #print "Y_o[t][sent[t]]",Y_o[t][sent[t]]
                #print np.sum(output.to_numpy())
                # output = Ym[t].trans() * data[t]
                # sent_ll += math.log10( max(np.sum(output.to_numpy()),1e-20) )
            ##### Initialize gradient vectors #####

            weight_update_ig_data = owl.zeros(
                [model.Layers[1], model.Layers[0]])
            weight_update_ig_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_ig_cell = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_ig_bias = owl.zeros([model.Layers[1], 1])

            weight_update_fg_data = owl.zeros(
                [model.Layers[1], model.Layers[0]])
            weight_update_fg_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_fg_cell = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_fg_bias = owl.zeros([model.Layers[1], 1])

            weight_update_og_data = owl.zeros(
                [model.Layers[1], model.Layers[0]])
            weight_update_og_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_og_cell = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_og_bias = owl.zeros([model.Layers[1], 1])

            weight_update_ff_data = owl.zeros(
                [model.Layers[1], model.Layers[0]])
            weight_update_ff_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_ff_bias = owl.zeros([model.Layers[1], 1])

            dC = [None] * Tau

            for t in xrange(Tau):
                dC[t] = owl.zeros(C[t].shape)

            # Calculate the error and add it
            for t in reversed(range(1, Tau)):
                #print "sent",sent
                #print "t",t

                # BP from og controled gate and og
                if tanhC_version:
                    tanhC = ele.tanh(C[t])
                    dTanhC = ele.mult(dHout[t], act_og[t])
                    sen_og = ele.mult(dHout[t], tanhC)
                    dC[t] += ele.mult((1 - ele.mult(tanhC, tanhC)), dTanhC)
                else:
                    sen_og = ele.mult(C[t], dHout[t])
                    dC[t] += ele.mult(act_og[t], dHout[t])

                # BP from og
                sen_og = ele.mult(ele.mult(act_og[t], (1.0 - act_og[t])),
                                  sen_og)
                dHout[t - 1] = model.og_weight_prev.trans() * sen_og
                dC[t] += model.og_weight_cell.trans() * sen_og
                dEmb[t] = model.og_weight_data.trans() * sen_og

                # BP from fg controled gate
                sen_fg = ele.mult(C[t - 1], dC[t])
                dC[t - 1] += ele.mult(act_fg[t], dC[t])

                # BP from ig controled gate
                sen_ig = ele.mult(act_ff[t], dC[t])
                sen_ff = ele.mult(act_ig[t], dC[t])
                sen_ff = ele.mult((1 - ele.mult(act_ff[t], act_ff[t])), sen_ff)
                dEmb[t] += model.ff_weight_data.trans() * sen_ff

                # BP from fg
                sen_fg = ele.mult(ele.mult(act_fg[t], (1.0 - act_fg[t])),
                                  sen_fg)
                dHout[t - 1] += model.fg_weight_prev.trans() * sen_fg
                dC[t - 1] += model.fg_weight_cell.trans() * sen_fg
                dEmb[t] += model.fg_weight_data.trans() * sen_fg

                # BP from ig
                sen_ig = ele.mult(ele.mult(act_ig[t], (1.0 - act_ig[t])),
                                  sen_ig)
                dHout[t - 1] += model.ig_weight_prev.trans() * sen_ig
                dC[t - 1] += model.ig_weight_cell.trans() * sen_ig
                dEmb[t] += model.ig_weight_data.trans() * sen_ig

                # derivatives on weight matrix and bias
                weight_update_ig_data += sen_ig * data[t].trans()
                weight_update_ig_prev += sen_ig * Hout[t - 1].trans()
                weight_update_ig_cell += sen_ig * C[t - 1].trans()
                weight_update_ig_bias += sen_ig

                weight_update_fg_data += sen_fg * data[t].trans()
                weight_update_fg_prev += sen_fg * Hout[t - 1].trans()
                weight_update_fg_cell += sen_fg * C[t - 1].trans()
                weight_update_fg_bias += sen_fg

                weight_update_og_data += sen_og * data[t].trans()
                weight_update_og_prev += sen_og * Hout[t - 1].trans()
                weight_update_og_cell += sen_og * C[t].trans()
                weight_update_og_bias += sen_og

                weight_update_ff_data += sen_ff * data[t].trans()
                weight_update_ff_prev += sen_ff * Hout[t - 1].trans()
                weight_update_ff_bias += sen_ff

            # normalize the gradients
            rate = learning_rate / Tau

            # weight update
            model.ig_weight_prev -= rate * weight_update_ig_prev
            model.ig_weight_data -= rate * weight_update_ig_data
            model.ig_weight_cell -= rate * weight_update_ig_cell
            model.ig_weight_bias -= rate * weight_update_ig_bias

            model.fg_weight_prev -= rate * weight_update_fg_prev
            model.fg_weight_data -= rate * weight_update_fg_data
            model.fg_weight_cell -= rate * weight_update_fg_cell
            model.fg_weight_bias -= rate * weight_update_fg_bias

            model.og_weight_prev -= rate * weight_update_og_prev
            model.og_weight_data -= rate * weight_update_og_data
            model.og_weight_cell -= rate * weight_update_og_cell
            model.og_weight_bias -= rate * weight_update_og_bias

            model.ff_weight_prev -= rate * weight_update_ff_prev
            model.ff_weight_data -= rate * weight_update_ff_data
            model.ff_weight_bias -= rate * weight_update_ff_bias

            model.decoder_weights -= rate * dWd
            model.decoder_bias -= rate * dBd

            for t in range(1, Tau):
                model.emb_weight[sent[t - 1]] -= rate * dEmb[t]

            # Print results
            epoch_ll += sent_ll
            # print(" Sentence %d LL: %f" % (sent_id, sent_ll))

        epoch_ent = epoch_ll * (-1) / words
        epoch_ppl = 2**epoch_ent
        cur_time = time.time()
        print("Epoch %d (alpha=%f) PPL=%f" %
              (epoch_id, learning_rate, epoch_ppl))
        print "  time consumed:", cur_time - last_time
        last_time = cur_time

    return model, learning_rate
Exemple #26
0
 def bp(self, y):
     return ele.mult(y, self.dropmask) * self.scale
     '''
Exemple #27
0
def train_one_mb(model, data, label, weightsgrad, biasgrad, dropout_rate):
    num_samples = data.shape[-1]
    num_layers = 20
    acts = [None] * num_layers
    sens = [None] * num_layers
    # FF
    acts[0] = data
    acts1 = conv_forward(acts[0], model.weights[0], model.bias[0],
                         model.conv_infos[0])
    acts[1] = ele.relu(
        acts1
    )  #(conv_forward(acts[0], model.weights[0], model.bias[0], model.conv_infos[0])) # conv1
    acts[2] = pooling_forward(acts[1], model.pooling_infos[0])  # pool1
    acts3 = conv_forward(acts[2], model.weights[1], model.bias[1],
                         model.conv_infos[1])  # conv2
    acts[3] = ele.relu(
        acts3
    )  #(conv_forward(acts[2], model.weights[1], model.bias[1], model.conv_infos[1])) # conv2
    acts[4] = pooling_forward(acts[3], model.pooling_infos[1])  # pool2
    acts5 = conv_forward(acts[4], model.weights[2], model.bias[2],
                         model.conv_infos[2])  # conv3
    acts[5] = ele.relu(
        acts5
    )  #(conv_forward(acts[4], model.weights[2], model.bias[2], model.conv_infos[2])) # conv3
    acts6 = conv_forward(acts[5], model.weights[3], model.bias[3],
                         model.conv_infos[3])  # conv4
    acts[6] = ele.relu(
        acts6
    )  #(conv_forward(acts[5], model.weights[3], model.bias[3], model.conv_infos[3])) # conv4
    acts7 = conv_forward(acts[6], model.weights[4], model.bias[4],
                         model.conv_infos[4])  # conv5
    acts[7] = ele.relu(
        acts7
    )  #(conv_forward(acts[6], model.weights[4], model.bias[4], model.conv_infos[4])) # conv5
    acts[8] = pooling_forward(acts[7], model.pooling_infos[2])  # pool5
    re_acts8 = acts[8].reshape([np.prod(acts[8].shape[0:3]), num_samples])
    acts9 = model.weights[5] * re_acts8 + model.bias[5]  # fc6
    acts[9] = ele.relu(
        acts9)  #(model.weights[5] * re_acts8 + model.bias[5]) # fc6
    mask6 = owl.randb(acts[9].shape, dropout_rate)
    acts[9] = ele.mult(acts[9], mask6)  # drop6
    acts10 = model.weights[6] * acts[9] + model.bias[6]  # fc7
    acts[10] = ele.relu(
        acts10)  #(model.weights[6] * acts[9] + model.bias[6]) # fc7
    mask7 = owl.randb(acts[10].shape, dropout_rate)
    acts[10] = ele.mult(acts[10], mask7)  # drop7
    acts[11] = model.weights[7] * acts[10] + model.bias[7]  # fc8
    acts[12] = softmax_forward(acts[11].reshape([1000, 1, 1, num_samples]),
                               soft_op.instance).reshape([1000,
                                                          num_samples])  # prob

    # error
    sens[11] = acts[12] - label

    # BP
    sens[10] = model.weights[7].trans() * sens[11]  # fc8
    sens[10] = ele.mult(sens[10], mask7)  # drop7
    sens[10] = ele.relu_back(sens[10], acts[10], acts10)  # relu7
    sens[9] = model.weights[6].trans() * sens[10]
    sens[9] = ele.mult(sens[9], mask6)  # drop6
    sens[9] = ele.relu_back(sens[9], acts[9], acts9)  # relu6
    sens[8] = (model.weights[5].trans() * sens[9]).reshape(
        acts[8].shape)  # fc6
    sens[7] = pooling_backward(sens[8], acts[8], acts[7],
                               model.pooling_infos[2])  # pool5
    sens[7] = ele.relu_back(sens[7], acts[7], acts7)  # relu5
    sens[6] = conv_backward_data(sens[7], model.weights[4],
                                 model.conv_infos[4])  # conv5
    sens[6] = ele.relu_back(sens[6], acts[6], acts6)  # relu4
    sens[5] = conv_backward_data(sens[6], model.weights[3],
                                 model.conv_infos[3])  # conv4
    sens[5] = ele.relu_back(sens[5], acts[5], acts5)  # relu3
    sens[4] = conv_backward_data(sens[5], model.weights[2],
                                 model.conv_infos[2])  # conv3
    sens[3] = pooling_backward(sens[4], acts[4], acts[3],
                               model.pooling_infos[1])  # pool2
    sens[3] = ele.relu_back(sens[3], acts[3], acts3)  # relu2
    sens[2] = conv_backward_data(sens[3], model.weights[1],
                                 model.conv_infos[1])  # conv2
    sens[1] = pooling_backward(sens[2], acts[2], acts[1],
                               model.pooling_infos[0])  # pool1
    sens[1] = ele.relu_back(sens[1], acts[1], acts1)  # relu1

    weightsgrad[7] = sens[11] * acts[10].trans()
    weightsgrad[6] = sens[10] * acts[9].trans()
    weightsgrad[5] = sens[9] * re_acts8.trans()
    weightsgrad[4] = conv_backward_filter(sens[7], acts[6],
                                          model.conv_infos[4])
    weightsgrad[3] = conv_backward_filter(sens[6], acts[5],
                                          model.conv_infos[3])
    weightsgrad[2] = conv_backward_filter(sens[5], acts[4],
                                          model.conv_infos[2])
    weightsgrad[1] = conv_backward_filter(sens[3], acts[2],
                                          model.conv_infos[1])
    weightsgrad[0] = conv_backward_filter(sens[1], acts[0],
                                          model.conv_infos[0])
    biasgrad[7] = sens[11].sum(1)
    biasgrad[6] = sens[10].sum(1)
    biasgrad[5] = sens[9].sum(1)
    biasgrad[4] = conv_backward_bias(sens[7])
    biasgrad[3] = conv_backward_bias(sens[6])
    biasgrad[2] = conv_backward_bias(sens[5])
    biasgrad[1] = conv_backward_bias(sens[3])
    biasgrad[0] = conv_backward_bias(sens[1])
    return acts[12]
Exemple #28
0
def train_one_mb(model, data, label, weightsgrad, biasgrad):
    #Be careful, python list is like pointer
    acts = [None] * model.num_layers
    sens = [None] * model.num_layers
    beforeacts = [None] * model.num_layers
    beforedropout = [None] * model.num_layers
    dropoutmask = [None] * model.num_layers
    before2fullyact = []
    conv2fullylayer = model.num_layers

    acts[0] = data

    num_samples = data.shape[-1]
    num_class = label.shape[0]

    #find the reshape layer
    for i in range(0, model.num_layers - 1):
        #if from conv 2 fully
        if (i < model.num_layers - 2) and (
                model.ff_infos[i]['ff_type'] == 'conv'
                or model.ff_infos[i]['ff_type'] == 'pooling') and (
                    model.ff_infos[i + 1]['ff_type'] == 'fully'):
            conv2fullylayer = i + 1
            break

    for i in range(0, model.num_layers - 1):
        if model.ff_infos[i]['ff_type'] == 'conv':
            #print '%d conv ff' % (i)
            beforeacts[i + 1] = conv_forward(acts[i], model.weights[i],
                                             model.bias[i],
                                             model.ff_infos[i]['conv_info'])
        elif model.ff_infos[i]['ff_type'] == 'pooling':
            #print '%d pooling ff' % (i)
            beforeacts[i + 1] = pooling_forward(
                acts[i], model.ff_infos[i]['pooling_info'])
        else:
            #print '%d fully ff' % (i)
            beforeacts[i + 1] = model.weights[i] * acts[i] + model.bias[i]

        #activation function
        if model.ff_infos[i]['neuron_type'] == 'RELU':
            #print '%d relu ff' % (i)
            acts[i + 1] = ele.relu(beforeacts[i + 1])
        elif model.ff_infos[i]['neuron_type'] == 'SOFTMAX':
            #print '%d softmax ff' % (i)
            acts[i + 1] = softmax_forward(
                beforeacts[i + 1].reshape([num_class, 1, 1, num_samples]),
                soft_op.instance).reshape([num_class, num_samples])  # prob
        else:
            #print '%d linear ff' % (i)
            acts[i + 1] = beforeacts[i + 1]

        #dropout
        beforedropout[i + 1] = acts[i + 1]
        if model.ff_infos[i]['dropout_rate'] > 0:
            #print '%d dropout ff' % (i)
            dropoutmask[i + 1] = owl.randb(acts[i + 1].shape,
                                           model.ff_infos[i]['dropout_rate'])
            acts[i + 1] = ele.mult(beforedropout[i + 1], dropoutmask[i + 1])

        if i + 1 == conv2fullylayer:
            before2fullyact = acts[i + 1]
            acts[i + 1] = before2fullyact.reshape(
                [np.prod(before2fullyact.shape[0:3]), num_samples])

    # error
    sens[model.num_layers - 1] = acts[model.num_layers - 1] - label

    #bp
    for i in range(model.num_layers - 1, 0, -1):
        if model.ff_infos[i - 1]['ff_type'] == 'conv':
            sens[i - 1] = conv_backward_data(
                sens[i], model.weights[i - 1],
                model.ff_infos[i - 1]['conv_info'])
        elif model.ff_infos[i - 1]['ff_type'] == 'pooling':
            if i == conv2fullylayer:
                sens[i - 1] = pooling_backward(
                    sens[i].reshape(before2fullyact.shape), before2fullyact,
                    acts[i - 1], model.ff_infos[i - 1]['pooling_info'])
            else:
                sens[i - 1] = pooling_backward(
                    sens[i], acts[i], acts[i - 1],
                    model.ff_infos[i - 1]['pooling_info'])
        else:
            sens[i - 1] = model.weights[i - 1].trans() * sens[i]

        if i - 2 >= 0:
            #dropout
            if model.ff_infos[i - 2]['dropout_rate'] > 0:
                sens[i - 1] = ele.mult(sens[i - 1], dropoutmask[i - 1])

            #backact
            if model.ff_infos[i - 2]['neuron_type'] == 'RELU':
                sens[i - 1] = ele.relu_back(sens[i - 1], beforedropout[i - 1],
                                            beforeacts[i - 1])
            else:
                sens[i - 1] = sens[i - 1]

    #gradient
    for i in range(0, model.num_layers - 1):
        if model.ff_infos[i]['ff_type'] == 'conv':
            weightsgrad[i] = conv_backward_filter(
                sens[i + 1], acts[i], model.ff_infos[i]['conv_info'])
            biasgrad[i] = conv_backward_bias(sens[i + 1])
        elif model.ff_infos[i]['ff_type'] == 'fully':
            weightsgrad[i] = sens[i + 1] * acts[i].trans()
            biasgrad[i] = sens[i + 1].sum(1)
        else:
            continue
    return acts[model.num_layers - 1]
Exemple #29
0
 def getloss(self):
     ''' Get the loss of the softmax (cross entropy)
     '''
     lossmat = ele.mult(ele.ln(self.ff_y), self.y)
     res = lossmat.sum(0).sum(1).to_numpy()
     return -res[0][0] / lossmat.shape[1]
Exemple #30
0
def train_one_mb(model, data, label, weightsgrad, biasgrad, dropout_rate):
    num_samples = data.shape[-1]
    num_layers = 20
    acts = [None] * num_layers
    sens = [None] * num_layers
    # FF
    acts[0] = data
    acts1 = conv_forward(acts[0], model.weights[0], model.bias[0], model.conv_infos[0])
    acts[1] = ele.relu(acts1)#(conv_forward(acts[0], model.weights[0], model.bias[0], model.conv_infos[0])) # conv1
    acts[2] = pooling_forward(acts[1], model.pooling_infos[0]) # pool1
    acts3 = conv_forward(acts[2], model.weights[1], model.bias[1], model.conv_infos[1]) # conv2
    acts[3] = ele.relu(acts3)#(conv_forward(acts[2], model.weights[1], model.bias[1], model.conv_infos[1])) # conv2
    acts[4] = pooling_forward(acts[3], model.pooling_infos[1]) # pool2
    acts5 = conv_forward(acts[4], model.weights[2], model.bias[2], model.conv_infos[2]) # conv3
    acts[5] = ele.relu(acts5)#(conv_forward(acts[4], model.weights[2], model.bias[2], model.conv_infos[2])) # conv3
    acts6 = conv_forward(acts[5], model.weights[3], model.bias[3], model.conv_infos[3]) # conv4
    acts[6] = ele.relu(acts6)#(conv_forward(acts[5], model.weights[3], model.bias[3], model.conv_infos[3])) # conv4
    acts7 = conv_forward(acts[6], model.weights[4], model.bias[4], model.conv_infos[4]) # conv5
    acts[7] = ele.relu(acts7)#(conv_forward(acts[6], model.weights[4], model.bias[4], model.conv_infos[4])) # conv5
    acts[8] = pooling_forward(acts[7], model.pooling_infos[2]) # pool5
    re_acts8 = acts[8].reshape([np.prod(acts[8].shape[0:3]), num_samples])
    acts9 = model.weights[5] * re_acts8 + model.bias[5] # fc6
    acts[9] = ele.relu(acts9)#(model.weights[5] * re_acts8 + model.bias[5]) # fc6
    mask6 = owl.randb(acts[9].shape, dropout_rate)
    acts[9] = ele.mult(acts[9], mask6) # drop6
    acts10 = model.weights[6] * acts[9] + model.bias[6] # fc7
    acts[10] = ele.relu(acts10)#(model.weights[6] * acts[9] + model.bias[6]) # fc7
    mask7 = owl.randb(acts[10].shape, dropout_rate)
    acts[10] = ele.mult(acts[10], mask7) # drop7
    acts[11] = model.weights[7] * acts[10] + model.bias[7] # fc8
    acts[12] = softmax_forward(acts[11].reshape([1000, 1, 1, num_samples]), soft_op.instance).reshape([1000, num_samples]) # prob
    # error
    sens[11] = acts[12] - label
    # BP
    sens[10] = model.weights[7].trans() * sens[11] # fc8
    sens[10] = ele.mult(sens[10], mask7) # drop7
    sens[10] = ele.relu_back(sens[10], acts[10]) # relu7
    sens[9] = model.weights[6].trans() * sens[10]
    sens[9] = ele.mult(sens[9], mask6) # drop6
    sens[9] = ele.relu_back(sens[9], acts[9]) # relu6
    sens[8] = (model.weights[5].trans() * sens[9]).reshape(acts[8].shape) # fc6
    sens[7] = pooling_backward(sens[8], acts[8], acts[7], model.pooling_infos[2]) # pool5
    sens[7] = ele.relu_back(sens[7], acts[7]) # relu5
    sens[6] = conv_backward_data(sens[7], model.weights[4], model.conv_infos[4]) # conv5
    sens[6] = ele.relu_back(sens[6], acts[6]) # relu4
    sens[5] = conv_backward_data(sens[6], model.weights[3], model.conv_infos[3]) # conv4
    sens[5] = ele.relu_back(sens[5], acts[5]) # relu3
    sens[4] = conv_backward_data(sens[5], model.weights[2], model.conv_infos[2]) # conv3
    sens[3] = pooling_backward(sens[4], acts[4], acts[3], model.pooling_infos[1]) # pool2
    sens[3] = ele.relu_back(sens[3], acts[3]) # relu2
    sens[2] = conv_backward_data(sens[3], model.weights[1], model.conv_infos[1]) # conv2
    sens[1] = pooling_backward(sens[2], acts[2], acts[1], model.pooling_infos[0]) # pool1
    sens[1] = ele.relu_back(sens[1], acts[1]) # relu1
    weightsgrad[7] = sens[11] * acts[10].trans()
    weightsgrad[6] = sens[10] * acts[9].trans()
    weightsgrad[5] = sens[9] * re_acts8.trans()
    weightsgrad[4] = conv_backward_filter(sens[7], acts[6], model.conv_infos[4])
    weightsgrad[3] = conv_backward_filter(sens[6], acts[5], model.conv_infos[3])
    weightsgrad[2] = conv_backward_filter(sens[5], acts[4], model.conv_infos[2])
    weightsgrad[1] = conv_backward_filter(sens[3], acts[2], model.conv_infos[1])
    weightsgrad[0] = conv_backward_filter(sens[1], acts[0], model.conv_infos[0])
    biasgrad[7] = sens[11].sum(1)
    biasgrad[6] = sens[10].sum(1)
    biasgrad[5] = sens[9].sum(1)
    biasgrad[4] = conv_backward_bias(sens[7])
    biasgrad[3] = conv_backward_bias(sens[6])
    biasgrad[2] = conv_backward_bias(sens[5])
    biasgrad[1] = conv_backward_bias(sens[3])
    biasgrad[0] = conv_backward_bias(sens[1])
    return acts[12]