Python softmax 예제들, ActivationLoss.softmax Python 예제들

예제 #1

0

파일 보기

	def predict(self,X_test,y_test,number_of_batches_to_test=10):

		batch_size, timesteps, input_dim = X_test.shape
		size_of_hidden_layers=self.size_of_hidden_layers
		X=X_test

		f=np.zeros((batch_size, timesteps, size_of_hidden_layers))
		i=np.zeros((batch_size, timesteps, size_of_hidden_layers))
		o=np.zeros((batch_size, timesteps, size_of_hidden_layers))
		c=np.zeros((batch_size, timesteps, size_of_hidden_layers))
		g=np.zeros((batch_size, timesteps, size_of_hidden_layers))
		h=np.zeros((batch_size, timesteps+1, size_of_hidden_layers))

		output=np.zeros((batch_size, timesteps, input_dim))
		pre_output=np.zeros_like(output)

		h[:,-1] = np.zeros((batch_size, size_of_hidden_layers))

		# Given the weight matrices we may now perform a last backpropagation
		# on new data to test

		for t in range(timesteps):

			f[:,t]=sigmoid(X[:,t].dot(self.Uf.T)+h[:,t].dot(self.Wf))
			i[:,t]=sigmoid(X[:,t].dot(self.Ui.T)+h[:,t].dot(self.Wi))
			o[:,t]=tanh(X[:,t].dot(self.Uo.T)+h[:,t].dot(self.Wo))
			g[:,t]=sigmoid(X[:,t].dot(self.Uc.T)+h[:,t].dot(self.Wc))
			c[:,t]=f[:,t]*c[:,t-1]+i[:,t]*g[:,t]

			h[:,t]=o[:,t]*tanh(c[:,t])

			pre_output[:,t]=h[:,t].dot(self.Wz)
			output[:,t]=softmax(h[:,t].dot(self.Wz))

		print ("Results:")

		for i in range(number_of_batches_to_test):
			tmp_X = np.argmax(X_test[i], axis=1)
			tmp_y1 = np.argmax(y_test[i], axis=1)
			tmp_y2 = np.argmax(output[i], axis=1)

			
			print'\n'
			print "input number "+str(i)+" :	"
			print "X      = [" +  str(tmp_X) + "]"
			print "y_true = [" +str(tmp_y1)+ "]"
			print "y_predicted = [" +str(tmp_y2)+ "]"

예제 #2

0

파일 보기

    def predict(self, X_test, y_test, number_of_batches_to_test=10):

        batch_size, timesteps, input_dim = X_test.shape
        size_of_hidden_layers = self.size_of_hidden_layers
        X = X_test

        s = np.zeros((batch_size, timesteps, size_of_hidden_layers))
        vst = np.zeros((batch_size, timesteps + 1, size_of_hidden_layers))
        o = np.zeros((batch_size, timesteps, input_dim))

        s[:, -1] = np.zeros((batch_size, size_of_hidden_layers))

        output = np.zeros((batch_size, timesteps, input_dim))

        # Given the weight matrices we may now perform a last backpropagation
        # on new data to test

        for t in range(timesteps):
            #s[i]=np.tanh(from_input_to_hidden_state.dot(x[i-1])+from_hidden_state_to_hidden_state.dot(s[i-1]))
            s[:, t] = X[:, t].dot(
                self.from_input_to_hidden_state.T) + s[:, t - 1].dot(
                    self.from_hidden_state_to_hidden_state.T)
            #vst[i]=from_hidden_state_to_output.T.dot(s[i])
            vst[:, t] = tanh(s[:, t])
            o[:, t] = vst[:, t].dot(self.from_hidden_state_to_output.T)
            output[:, t] = softmax(o[:, t])

        print("Results:")

        for i in range(number_of_batches_to_test):
            tmp_X = np.argmax(X_test[i], axis=1)
            tmp_y1 = np.argmax(y_test[i], axis=1)
            tmp_y2 = np.argmax(output[i], axis=1)

            print '\n'
            print "input number " + str(i) + " :	"
            print "X      = [" + str(tmp_X) + "]"
            print "y_true = [" + str(tmp_y1) + "]"
            print "y_predicted = [" + str(tmp_y2) + "]"

예제 #3

0

파일 보기

파일: GatedRecurrentUnit.py 프로젝트: TrkSml/Recurrent-Neural-architectures

    def predict(self, X_test, y_test, number_of_batches_to_test=10):

        batch_size, timesteps, input_dim = X_test.shape
        size_of_hidden_layers = self.size_of_hidden_layers
        X = X_test

        r = np.zeros((batch_size, timesteps, size_of_hidden_layers))
        sor = np.zeros((batch_size, timesteps, size_of_hidden_layers))
        z = np.zeros((batch_size, timesteps, size_of_hidden_layers))
        ht = np.zeros((batch_size, timesteps, size_of_hidden_layers))
        h = np.zeros((batch_size, timesteps + 1, size_of_hidden_layers))
        output = np.zeros((batch_size, timesteps, input_dim))

        #Performing a one last propagation for our final prediction

        for t in range(timesteps):
            r[:,
              t] = sigmoid(X[:, t].dot(self.Ur.T) + h[:, t - 1].dot(self.Wr))
            z[:,
              t] = sigmoid(X[:, t].dot(self.Uz.T) + h[:, t - 1].dot(self.Wz))
            ###
            sor[:, t] = h[:, t - 1] * r[:, t]
            ht[:, t] = tanh(X[:, t].dot(self.Uh.T) + sor[:, t].dot(self.Wh))
            h[:, t] = (1 - z[:, t]) * ht[:, t] + z[:, t] * h[:, t - 1]

            output[:, t] = softmax(h[:, t].dot(self.Wo))

        print("Results:")

        for i in range(number_of_batches_to_test):
            tmp_X = np.argmax(X_test[i], axis=1)
            tmp_y1 = np.argmax(y_test[i], axis=1)
            tmp_y2 = np.argmax(output[i], axis=1)

            print '\n'
            print "input number " + str(i) + " :	"
            print "X      = [" + str(tmp_X) + "]"
            print "y_true = [" + str(tmp_y1) + "]"
            print "y_predicted = [" + str(tmp_y2) + "]"

예제 #4

0

파일 보기

    def forward_and_backwardpropagation(self, X_train, y_train):
        batch_size, timesteps, input_dim = X_train.shape
        size_of_hidden_layers = self.size_of_hidden_layers
        X = X_train

        ## Initializing the wight matrices according to some external material

        self.from_input_to_hidden_state = np.random.uniform(
            -1. / np.sqrt(input_dim), 1. / np.sqrt(input_dim),
            ((size_of_hidden_layers, input_dim)))
        self.from_hidden_state_to_hidden_state = np.random.uniform(
            -1. / np.sqrt(size_of_hidden_layers),
            1. / np.sqrt(size_of_hidden_layers),
            (size_of_hidden_layers, size_of_hidden_layers))
        self.from_hidden_state_to_output = np.random.uniform(
            -1. / np.sqrt(size_of_hidden_layers),
            1. / np.sqrt(size_of_hidden_layers),
            (input_dim, size_of_hidden_layers))

        # Beginning of the forward propagation for a certain number of epochs

        for i in range(self.epochs):
            s = np.zeros((batch_size, timesteps, size_of_hidden_layers))
            vst = np.zeros((batch_size, timesteps + 1, size_of_hidden_layers))
            o = np.zeros((batch_size, timesteps, input_dim))
            output = np.zeros((batch_size, timesteps, input_dim))

            s[:, -1] = np.zeros((batch_size, size_of_hidden_layers))

            for t in range(timesteps):

                ## Using the RNN equations
                #s[i]=np.tanh(from_input_to_hidden_state.dot(x[i-1])+from_hidden_state_to_hidden_state.dot(s[i-1]))
                s[:, t] = X[:, t].dot(
                    self.from_input_to_hidden_state.T) + s[:, t - 1].dot(
                        self.from_hidden_state_to_hidden_state.T)
                #vst[i]=from_hidden_state_to_output.T.dot(s[i])
                vst[:, t] = tanh(s[:, t])
                o[:, t] = vst[:, t].dot(self.from_hidden_state_to_output.T)
                output[:, t] = softmax(o[:, t])

            # Initializing the gradients which will hold the errors during backpropagation

            gradient_input_to_hidden = np.zeros_like(
                self.from_input_to_hidden_state)  #U
            gradient_hidden_to_hidden = np.zeros_like(
                self.from_hidden_state_to_hidden_state)  #W
            gradient_hidden_to_output = np.zeros_like(
                self.from_hidden_state_to_output)  #V

            ## Calculating the Cross Entropy loss and its derivative

            lossgradients = []
            for y_i, o_i in zip(y_train, output):
                lossgradients.append(lossCE.derivative(y_i, o_i))

            lossgradients = array(lossgradients)

            if not i % 100:
                print "LOSS: ", np.mean(array(lossgradients))

            for t in reversed(range(timesteps)):

                gradient_hidden_to_output += lossgradients[:, t].T.dot(vst[:,
                                                                           t])
            deltas = lossgradients[:, t].dot(
                self.from_hidden_state_to_output) * tanh.prime(s[:, t])
            # Instead of going all the way back to timestep 0
            # We truncate our backpropagation

            for i in reversed(np.arange(max(0, t - self.truncation), t + 1)):

                gradient_input_to_hidden += deltas.T.dot(X[:, i])
                gradient_hidden_to_hidden += deltas.T.dot(vst[:, i - 1])
                # Calculate gradient w.r.t previous state
                deltas = deltas.dot(gradient_hidden_to_hidden) * tanh.prime(
                    s[:, i - 1])

            # Updating our Weight matrices
            self.from_input_to_hidden_state = SGD.update(
                self.from_input_to_hidden_state, gradient_input_to_hidden)
            self.from_hidden_state_to_hidden_state = SGD.update(
                self.from_hidden_state_to_hidden_state,
                gradient_hidden_to_hidden)
            self.from_hidden_state_to_output = SGD.update(
                self.from_hidden_state_to_output, gradient_hidden_to_output)

예제 #5

0

파일 보기


def train_test_split(X, y, split_size=0.3):
    # Split the training data from test data in the ratio specified in
    # test_size
    """Written by Erik Linder-Noren """
    length = int(len(X) * split_size)
    X_train, X_test = X[:length], X[length:]
    y_train, y_test = y[:length], y[length:]

    return X_train, X_test, y_train, y_test


lossCE = lossCE()
SGD = SGD()
softmax = softmax()
tanh = tanh_func()


class RNN:
    def __init__(self, size_of_hidden_layers=100, epochs=100, truncation=10):

        self.epochs = epochs
        self.size_of_hidden_layers = size_of_hidden_layers
        self.truncation = truncation

    def forward_and_backwardpropagation(self, X_train, y_train):
        batch_size, timesteps, input_dim = X_train.shape
        size_of_hidden_layers = self.size_of_hidden_layers
        X = X_train

예제 #6

0

파일 보기

파일: GatedRecurrentUnit.py 프로젝트: TrkSml/Recurrent-Neural-architectures

    def forward_and_backwardpropagation(self, X_train, y_train):
        batch_size, timesteps, input_dim = X_train.shape
        size_of_hidden_layers = self.size_of_hidden_layers
        X = X_train

        ## Initializing the wight matrices according to some external material

        self.Uz = np.random.uniform(-1. / np.sqrt(input_dim),
                                    1. / np.sqrt(input_dim),
                                    ((size_of_hidden_layers, input_dim)))
        self.Ur = np.random.uniform(-1. / np.sqrt(input_dim),
                                    1. / np.sqrt(input_dim),
                                    ((size_of_hidden_layers, input_dim)))
        self.Uh = np.random.uniform(-1. / np.sqrt(input_dim),
                                    1. / np.sqrt(input_dim),
                                    ((size_of_hidden_layers, input_dim)))

        self.Wz = np.random.uniform(
            -1. / np.sqrt(size_of_hidden_layers),
            1. / np.sqrt(size_of_hidden_layers),
            (size_of_hidden_layers, size_of_hidden_layers))
        self.Wr = np.random.uniform(
            -1. / np.sqrt(size_of_hidden_layers),
            1. / np.sqrt(size_of_hidden_layers),
            (size_of_hidden_layers, size_of_hidden_layers))
        self.Wh = np.random.uniform(
            -1. / np.sqrt(size_of_hidden_layers),
            1. / np.sqrt(size_of_hidden_layers),
            (size_of_hidden_layers, size_of_hidden_layers))

        self.Wo = np.random.uniform(-1. / np.sqrt(size_of_hidden_layers),
                                    1. / np.sqrt(size_of_hidden_layers),
                                    (size_of_hidden_layers, input_dim))

        Uz = self.Uz
        Ur = self.Ur
        Uh = self.Uh

        Wz = self.Wz
        Wr = self.Wr
        Wh = self.Wh

        Wo = self.Wo

        crawler = 0

        # Beginning of the forward propagation for a certain number of epochs

        for crawler in range(self.epochs):

            r = np.zeros((batch_size, timesteps, size_of_hidden_layers))
            sor = np.zeros((batch_size, timesteps, size_of_hidden_layers))
            z = np.zeros((batch_size, timesteps, size_of_hidden_layers))
            q = np.zeros((batch_size, timesteps, size_of_hidden_layers))
            ht = np.zeros((batch_size, timesteps, size_of_hidden_layers))
            h = np.zeros((batch_size, timesteps + 1, size_of_hidden_layers))
            output = np.zeros((batch_size, timesteps, input_dim))

            for t in range(timesteps):

                ## Applying the GRU equations

                r[:, t] = sigmoid(X[:, t].dot(Ur.T) + h[:, t - 1].dot(Wr))
                z[:, t] = sigmoid(X[:, t].dot(Uz.T) + h[:, t - 1].dot(Wz))
                sor[:, t] = h[:, t - 1] * r[:, t]
                q[:, t] = X[:, t].dot(Uh.T) + sor[:, t].dot(Wh)
                ht[:, t] = tanh(q[:, t])
                h[:, t] = (1 - z[:, t]) * ht[:, t] + z[:, t] * h[:, t - 1]

                output[:, t] = softmax(h[:, t].dot(Wo))

            # Initializing the gradients which will hold the errors during backpropagation
            gradient_Uz = np.zeros_like(Uz)  #U
            gradient_Ur = np.zeros_like(Ur)  #U
            gradient_Uh = np.zeros_like(Uh)  #U

            gradient_Wz = np.zeros_like(Wz)  #U
            gradient_Wr = np.zeros_like(Wr)  #U
            gradient_Wh = np.zeros_like(Wh)  #U

            gradient_Wo = np.zeros_like(Wo)  #U

            dr = np.zeros_like(r)
            dsor = np.zeros_like(sor)
            dz = np.zeros_like(z)
            dht = np.zeros_like(ht)
            dh = np.zeros_like(h)

            #???
            lossgradients = []
            loss = []
            for y_i, o_i in zip(y_train, output):
                loss.append(abs(y_i - o_i))
                lossgradients.append(lossCE.derivative(y_i, o_i))

            lossgradients = array(lossgradients)
            loss = array(loss)

            dh[:, -1] = np.zeros((batch_size, size_of_hidden_layers))
            if not crawler % 100:
                print "LOSS: ", np.mean(array(lossgradients))

            # Backpropagation
            for t in reversed(range(timesteps)):

                delta = np.zeros((batch_size, size_of_hidden_layers))
                #dh[:,t]=(loss[:,t]).dot(Wz.T)

                # Instead of going all the way back to timestep 0
                # We truncate our backpropagation
                for ti in reversed(
                        np.arange(max(0, t - self.truncation), t + 1)):

                    delta += (lossgradients[:, t] *
                              softmax.prime(h[:, t].dot(Wo))).dot(
                                  Wo.T) * (1 - z[:, t]) * tanh.prime(q[:, t])

                dr[:, ti] = (Wr.dot(delta.T)).T * h[:, ti - 1]  #
                dz[:, ti] = (Wz.dot(delta.T)).T * r[:, ti] * (
                    h[:, ti - 2] - ht[:, ti - 1])  #

                gradient_Wo += h[:, ti].T.dot(loss[:, ti])  #

                gradient_Uz += (softmax.prime(z[:, ti]) * dz[:, ti]).T.dot(
                    X[:, ti])
                gradient_Ur += (softmax.prime(r[:, ti]) * dr[:, ti]).T.dot(
                    X[:, ti])  #
                gradient_Uh += (X[:, ti - 1].T.dot(delta)).T  #

                gradient_Wz += (softmax.prime(z[:, ti]) * dz[:, ti]).T.dot(
                    h[:, ti - 1])
                gradient_Wr += (softmax.prime(r[:, ti]) * dr[:, ti]).T.dot(
                    h[:, ti - 1])  #
                gradient_Wh += delta.T.dot((h[:, t - 1] * r[:, t]))  #

            # Updating our Weight matrices
            self.Uz = SGD.update(Uz, gradient_Uz)
            self.Ur = SGD.update(Ur, gradient_Ur)
            self.Uh = SGD.update(Uh, gradient_Uh)

            self.Wz = SGD.update(Wz, gradient_Wz)
            self.Wr = SGD.update(Wr, gradient_Wr)
            self.Wh = SGD.update(Wh, gradient_Wh)

            self.Wo = SGD.update(Wo, gradient_Wo)

예제 #7

0

파일 보기

	def forward_and_backwardpropagation(self,X_train,y_train):
		batch_size, timesteps, input_dim = X_train.shape
		size_of_hidden_layers=self.size_of_hidden_layers
		X=X_train

		## Initializing the wight matrices according to some external material

		self.Uc=np.random.uniform(-1./np.sqrt(input_dim),1./np.sqrt(input_dim),((size_of_hidden_layers, input_dim)))
		self.Ui=np.random.uniform(-1./np.sqrt(input_dim),1./np.sqrt(input_dim),((size_of_hidden_layers, input_dim)))
		self.Uf=np.random.uniform(-1./np.sqrt(input_dim),1./np.sqrt(input_dim),((size_of_hidden_layers, input_dim)))
		self.Uo=np.random.uniform(-1./np.sqrt(input_dim),1./np.sqrt(input_dim),((size_of_hidden_layers, input_dim)))

		self.Wc=np.random.uniform(-1./np.sqrt(size_of_hidden_layers),1./np.sqrt(size_of_hidden_layers),(size_of_hidden_layers, size_of_hidden_layers))
		self.Wi=np.random.uniform(-1./np.sqrt(size_of_hidden_layers),1./np.sqrt(size_of_hidden_layers),(size_of_hidden_layers, size_of_hidden_layers))
		self.Wf=np.random.uniform(-1./np.sqrt(size_of_hidden_layers),1./np.sqrt(size_of_hidden_layers),(size_of_hidden_layers, size_of_hidden_layers))
		self.Wo=np.random.uniform(-1./np.sqrt(size_of_hidden_layers),1./np.sqrt(size_of_hidden_layers),(size_of_hidden_layers, size_of_hidden_layers))

		self.Wz=np.random.uniform(-1./np.sqrt(size_of_hidden_layers),1./np.sqrt(size_of_hidden_layers),(size_of_hidden_layers, input_dim))

		Uc=self.Uc
		Ui=self.Ui
		Uf=self.Uf
		Uo=self.Uo

		Wc=self.Wc
		Wi=self.Wi
		Wf=self.Wf
		Wo=self.Wo

		Wz=self.Wz

		# Beginning of the forward propagation for a certain number of epochs

		for crawler in range(self.epochs):

			f=np.zeros((batch_size, timesteps, size_of_hidden_layers))
			i=np.zeros((batch_size, timesteps, size_of_hidden_layers))
			o=np.zeros((batch_size, timesteps, size_of_hidden_layers))
			c=np.zeros((batch_size, timesteps, size_of_hidden_layers))
			g=np.zeros((batch_size, timesteps, size_of_hidden_layers))
			h=np.zeros((batch_size, timesteps+1, size_of_hidden_layers))

			output=np.zeros((batch_size, timesteps, input_dim))

			h[:,-1] = np.zeros((batch_size, size_of_hidden_layers))

			for t in range(timesteps):

				## Applying the LSTM equations

				f[:,t]=sigmoid(X[:,t].dot(Uf.T)+h[:,t-1].dot(Wf))
				i[:,t]=sigmoid(X[:,t].dot(Ui.T)+h[:,t-1].dot(Wi))
				o[:,t]=tanh(X[:,t].dot(Uo.T)+h[:,t-1].dot(Wo))
				g[:,t]=sigmoid(X[:,t].dot(Uc.T)+h[:,t-1].dot(Wc))
				c[:,t]=f[:,t]*c[:,t-1]+i[:,t]*g[:,t]

				h[:,t]=o[:,t]*tanh(c[:,t])

				output[:,t]=softmax(h[:,t].dot(Wz))

			# Initializing the gradients which will hold the errors during backpropagation
			gradient_Uc = np.zeros_like(Uc) #U
			gradient_Ui = np.zeros_like(Ui) #U
			gradient_Uf = np.zeros_like(Uf) #U
			gradient_Uo = np.zeros_like(Uo) #U

			gradient_Wc = np.zeros_like(Wc) #U
			gradient_Wi = np.zeros_like(Wi) #U
			gradient_Wf = np.zeros_like(Wf) #U
			gradient_Wo = np.zeros_like(Wo) #U

			gradient_Wz = np.zeros_like(Wz) #U

			df=np.zeros_like(f)
			di=np.zeros_like(i)
			do=np.zeros_like(o)
			dg=np.zeros_like(g)
			dc=np.zeros_like(c)

			dh=np.zeros_like(h)

			#???

			## Calculating the Cross Entropy loss and its derivative
			lossgradients=[]
			loss=[]
			for y_i,o_i in zip(y_train,output):
				loss.append(abs(y_i-o_i))
				lossgradients.append(lossCE.derivative(y_i,o_i))

			lossgradients=array(lossgradients)
			loss=array(loss)

			dh[:,-1]=np.zeros((batch_size, size_of_hidden_layers))
			if not crawler%100:
			 	print "LOSS: ",np.mean(array(lossgradients))


			# Backpropagation
			for t in reversed(range(timesteps)):
				
		  		dh[:,t]=dh[:,t-1]+(loss[:,t]).dot(Wz.T)
		  		#dh[:,t]=(loss[:,t]).dot(Wz.T)

		  		# Instead of going all the way back to timestep 0
		  		# We truncate our backpropagation
				for ti in reversed(np.arange(max(0, t - self.truncation), t+1)):

					do[:,ti]=tanh(c[:,ti])*dh[:,ti]
					dc[:,ti]=tanh.prime(c[:,ti])*o[:,ti]*dh[:,ti]
					df[:,ti]=c[:,ti-1]*dc[:,ti]
					dc[:,ti-1]+=f[:,ti-1]*dc[:,ti]
					di[:,ti]=g[:,ti]*dc[:,ti]
					dg[:,ti]=di[:,ti]*dc[:,ti]

					#gradient_Wz+=h[:,ti].T.dot(lossgradients[:,ti])
					gradient_Wz+=h[:,ti].T.dot(loss[:,ti])

					gradient_Uo+=(tanh.prime(o[:,ti])*do[:,ti]).T.dot(X[:,ti])
					gradient_Ui+=(softmax.prime(i[:,ti])*di[:,ti]).T.dot(X[:,ti])
					gradient_Uf+=(softmax.prime(f[:,ti])*df[:,ti]).T.dot(X[:,ti])
					gradient_Uc+=(softmax.prime(g[:,ti])*dg[:,ti]).T.dot(X[:,ti])

					gradient_Wo+=(tanh.prime(o[:,ti])*do[:,ti]).T.dot(h[:,ti+1])
					gradient_Wi+=(softmax.prime(i[:,ti])*di[:,ti]).T.dot(h[:,ti+1])
					gradient_Wf+=(softmax.prime(f[:,ti])*df[:,ti]).T.dot(h[:,ti+1])
					gradient_Wc+=(softmax.prime(g[:,ti])*dg[:,ti]).T.dot(h[:,ti+1])
			

			# Updating our Weight matrices
			self.Ui=SGD.update(Ui,gradient_Ui)
			self.Uf=SGD.update(Uf,gradient_Uf)
			self.Uo=SGD.update(Uo,gradient_Uo)
			self.Uc=SGD.update(Uc,gradient_Uc)

			self.Wi=SGD.update(Wi,gradient_Wi)
			self.Wf=SGD.update(Wf,gradient_Wf)
			self.Wo=SGD.update(Wo,gradient_Wo)
			self.Wc=SGD.update(Wc,gradient_Wc)

			self.Wz=SGD.update(Wz,gradient_Wz)