def __init__(self, M1, M2, an_id):
		self.id = an_id 
		self.M1 = M1 
		self.M2 = M2
		W, b = init_weight_and_bias(M1, M2)
		self.W = theano.shared(W, 'W_%s' % an_id)
		self.b = theano.shared(b, 'b_%s' % an_id)
		self.params=[self.W,self.b]
 def __init__(self, M1, M2, an_id):
     self.id = an_id
     self.M1 = M1
     self.M2 = M2
     W, b = init_weight_and_bias(M1, M2)
     self.W = tf.Variable(W.astype(np.float32), name='W%d' % an_id)
     self.b = tf.Variable(b.astype(np.float32), name='b%d' % an_id)
     self.params = [self.W, self.b]
	def fit(self, X, Y, learning_rate=10e-5, epochs=200, reg=10e-8, batch_sz=200, show_fig=False, activation=tf.tanh):
		X, Y = shuffle(X, Y)
		K = len(np.unique(Y))  

		T = y2indicator(Y, K).astype(np.float32)
		Xvalid, Yvalid, Tvalid = X[-1000:,], Y[-1000:], T[-1000:,:] 
		Xtrain, Ytrain, Ttrain = X[:-1000,:], Y[:-1000],T[:-1000,:] 

		N, D = Xtrain.shape
		

		#Varianel initialization
		W1, b1 = init_weight_and_bias(D,self.M)
		W2, b2 = init_weight_and_bias(self.M,K)



		self.W1 = tf.Variable(W1.astype(np.float32), 'W1')
		self.b1 = tf.Variable(b1.astype(np.float32), 'b1')
		self.W2 = tf.Variable(W2.astype(np.float32), 'W2')
		self.b2 = tf.Variable(b2.astype(np.float32), 'b2')
		self.params = [self.W1, self.b1, self.W2, self.b2] 
		# Define placeholders
		X = tf.placeholder(tf.float32,shape=(None,D),name='X')
		T = tf.placeholder(tf.float32,shape=(None,K),name='Y')

		
		

		Z = activation(tf.matmul(X, self.W1) + self.b1) 		
		Yish = tf.matmul(Z, self.W2) + self.b2 

		rcost  = reg*tf.reduce_sum([tf.nn.l2_loss(p) for p in self.params])
		cost   = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits(labels=T, logits=Yish) ) + rcost 
		
		
		train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
		self.predict_op = tf.argmax(Yish, 1)

		n_batches = N // batch_sz 
		costs=[] 
		errors=[] 
		init = tf.global_variables_initializer()
		with tf.Session() as session:
			session.run(init)

			for i in xrange(epochs):
				Xtrain, Ytrain = shuffle(Xtrain, Ytrain)
				for j in xrange(n_batches): 
					Xbatch = Xtrain[j*batch_sz:(j+1)*batch_sz,:]
					Ybatch = Ytrain[j*batch_sz:(j+1)*batch_sz]
					Tbatch = Ttrain[j*batch_sz:(j+1)*batch_sz,:]

					session.run(train_op,
						feed_dict={
							X: Xbatch,
							T: Tbatch 
					})

					if j % 10 == 0: 
						c = session.run(cost, feed_dict={X:Xvalid, T:Tvalid} )
						pYvalid  = session.run( self.predict_op, feed_dict={X: Xvalid} )
						err = error_rate(Yvalid, pYvalid)
						print "i:%d\tj:%d\tc:%.3f\terr:%.3f\t" % (i,j,c,err)	
						costs.append(c)
						errors.append(err)

		if show_fig:
			plt.title('costs')
			plt.plot(costs)
			plt.show()

			plt.title('error rate')
			plt.plot(errors)
			plt.show()
	def fit(self, X, Y, learning_rate=10e-4, reg=10e-8, epochs=10000, show_figure=False):
		Nvalid = 1000
		N, D  = X.shape 
		K =  len(np.unique(Y))
		X, Y  = shuffle(X, Y)

		Xvalid, Yvalid = X[-Nvalid:,:],  Y[-Nvalid:,]
		X, Y = X[:-Nvalid,:], Y[:-Nvalid,]


		#Initialize Hidden layers 
		self.hidden_layers = [] 
		M1 = D 		
		for count, M2 in enumerate(self.hidden_layer_sizes):
			hidden_layer =  HiddenLayer(M1, M2, count)
			self.hidden_layers.append(hidden_layer)
			M1=M2

		#final layer
		W, b = init_weight_and_bias(M1, K)  
		self.W = theano.shared(W, 'W_logreg')
		self.b = theano.shared(b, 'b_logreg')

		#collect parameters for later use
		self.params = []
		for h in self.hidden_layers: 
			self.params += h.params
		self.params += [self.W, self.b]
		
		
		#Theano variables 
		thX = T.fmatrix('X')
		thY = T.ivector('Y')		
		pY =self.th_forward(thX)

		costs = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY]))
		prediction = self.th_predict(thX)


		#actual prediction functions and variabels
		self.predict_op=theano.function(inputs=[thX], outputs=prediction)
		cost_predict_op=theano.function(inputs=[thX, thY], outputs=[costs, prediction])

		#Streamline initializations
		updates = [
			(p, p - learning_rate*(T.grad(costs,p) + reg*p)) for p  in self.params
		]
		
		train_op = theano.function(
			inputs=[thX, thY],
			updates=updates,
			allow_input_downcast=True,
		)

		batch_sz=200
		n_batches = N / batch_sz
		costs = [] 
		for i in xrange(epochs):
			X,Y = shuffle(X,Y)
			for j in range(n_batches):
				Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz),:]
				Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]

				train_op(Xbatch.astype(np.float32), Ybatch.astype(np.int32))
				
				if j % 100 == 0:
					c, p = cost_predict_op(Xvalid.astype(np.float32), Yvalid.astype(np.int32))
					costs.append(c)
					err = error_rate(Yvalid, p)

					print "i:%d\tj:%d\tnb:%d\tc:%.3f\terr:%.3f\t" % (i,j,n_batches,c,err)
			print "i:%d\tj:%d\tnb:%d\tc:%.3f\terr:%.3f\t" % (i,batch_sz,n_batches,c,err)		
		
		print "Final error rate", err 
			
		if show_fig: 
			plt.plot(costs)
			plt.show() 
Ejemplo n.º 5
0
    def fit(self,
            X,
            Y,
            learning_rate=10e-7,
            mu=0.99,
            decay=0.999,
            reg=10e-12,
            eps=10e-10,
            epochs=400,
            batch_sz=100,
            show_figure=False):
        #Input to float32

        learning_rate = np.float32(learning_rate)
        mu = np.float32(mu)
        decay = np.float32(decay)
        reg = np.float32(reg)
        eps = np.float32(eps)

        Nvalid = 1000
        N, D = X.shape
        K = len(np.unique(Y))
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = Y.astype(np.float32)

        Xvalid, Yvalid = X[-Nvalid:, :], Y[-Nvalid:, ]
        X, Y = X[:-Nvalid, :], Y[:-Nvalid, ]

        #Initialize Hidden layers
        self.hidden_layers = []
        M1 = D
        for count, M2 in enumerate(self.hidden_layer_sizes):
            hidden_layer = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(hidden_layer)
            M1 = M2

        #final layer
        W, b = init_weight_and_bias(M1, K)
        self.W = theano.shared(W, 'W_logreg')
        self.b = theano.shared(b, 'b_logreg')

        #collect parameters for later use
        self.params = []
        for h in self.hidden_layers:
            self.params += h.params
        self.params += [self.W, self.b]

        #for momumentum
        dparams = [
            theano.shared(np.zeros(p.get_value().shape, dtype=np.float32))
            for p in self.params
        ]

        #for rmsprop
        cache = [
            theano.shared(np.zeros(p.get_value().shape, dtype=np.float32))
            for p in self.params
        ]

        #Theano variables
        thX = T.fmatrix('X')
        thY = T.ivector('Y')
        pY = self.th_forward(thX)

        rcost = reg * T.sum([(p * p).sum() for p in self.params])
        cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost
        prediction = self.th_predict(thX)

        #actual prediction functions and variabels
        self.predict_op = theano.function(inputs=[thX], outputs=prediction)
        cost_predict_op = theano.function(inputs=[thX, thY],
                                          outputs=[cost, prediction])

        updates = [
            (c, decay * c +
             (np.float32(1) - decay) * T.grad(cost, p) * T.grad(cost, p))
            for p, c in zip(self.params, cache)
        ] + [
            (p,
             p + mu * dp - learning_rate * T.grad(cost, p) / T.sqrt(c + eps))
            for p, c, dp in zip(self.params, cache, dparams)
        ] + [(dp, mu * dp - learning_rate * T.grad(cost, p) / T.sqrt(c + eps))
             for p, c, dp in zip(self.params, cache, dparams)]

        train_op = theano.function(inputs=[thX, thY], updates=updates)

        n_batches = N / batch_sz
        costs = []
        for i in xrange(epochs):
            X, Y = shuffle(X, Y)
            for j in range(n_batches):
                Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz), :]
                Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)]

                train_op(Xbatch.astype(np.float32), Ybatch.astype(np.int32))

                if j % 100 == 0:
                    c, p = cost_predict_op(Xvalid.astype(np.float32),
                                           Yvalid.astype(np.int32))
                    costs.append(c)
                    err = error_rate(Yvalid, p)

                    print "i:%d\tj:%d\tnb:%d\tc:%.3f\terr:%.3f\t" % (
                        i, j, n_batches, c, err)
            print "i:%d\tj:%d\tnb:%d\tc:%.3f\terr:%.3f\t" % (i, batch_sz,
                                                             n_batches, c, err)

        print "Final error rate", err

        if show_fig:
            plt.plot(costs)
            plt.show()
    def fit(self,
            X,
            Y,
            learning_rate=10e-8,
            mu=0.99,
            decay=0.99,
            reg=10e-8,
            epochs=400,
            batch_sz=100,
            show_figure=False):
        X, Y = shuffle(X, Y)
        K = len(np.unique(Y))
        Y = y2indicator(Y, K).astype(np.float32)

        Xvalid, Yvalid = X[-1000:, :], Y[-1000:]
        Yvalid_flat = np.argmax(Yvalid, axis=1)
        Xtrain, Ytrain = X[:-1000, :], Y[:-1000]

        N, D = X.shape

        #Build hidden layers
        M1 = D
        self.hidden_layers = []
        self.params = []
        for an_id, M2 in enumerate(self.hidden_layer_sizes):
            h = HiddenLayer(M1, M2, an_id)
            self.hidden_layers.append(h)
            self.params += h.params
            M1 = M2

        M2 = K
        an_id = len(self.hidden_layer_sizes)
        W, b = init_weight_and_bias(M1, M2)
        self.W = tf.Variable(W.astype(np.float32), name='W%d' % an_id)
        self.b = tf.Variable(b.astype(np.float32), name='b%d' % an_id)

        self.params += [self.W, self.b]

        X = tf.placeholder(tf.float32, shape=(None, D), name='X')
        Y = tf.placeholder(tf.float32, shape=(None, K), name='Y')
        Yish = self.forward(X)

        # cost functions
        rcost = reg * tf.reduce_sum([tf.nn.l2_loss(p) for p in self.params
                                     ])  # L2 regularization costs
        cost = tf.reduce_sum(
            tf.nn.softmax_cross_entropy_with_logits(labels=Y,
                                                    logits=Yish)) + rcost

        train_op = tf.train.RMSPropOptimizer(learning_rate,
                                             decay=decay,
                                             momentum=mu).minimize(cost)
        predict_op = tf.argmax(Yish, 1)

        LL = []
        n_batches = int(N / batch_sz)
        best_validation_error = 1
        init = tf.global_variables_initializer()
        with tf.Session() as session:
            session.run(init)

            for i in xrange(epochs):
                Xtrain, Ytrain = shuffle(Xtrain, Ytrain)

                for j in xrange(n_batches):
                    Xbatch = Xtrain[j * (batch_sz):(j + 1) * batch_sz, :]
                    Ybatch = Ytrain[j * (batch_sz):(j + 1) * batch_sz, :]

                    session.run(train_op, feed_dict={X: Xbatch, Y: Ybatch})

                    if j % 100 == 0:
                        pY = session.run(predict_op, feed_dict={X: Xvalid})
                        c = session.run(cost, feed_dict={X: Xvalid, Y: Yvalid})
                        err = error_rate(Yvalid_flat, pY)
                        LL.append(c)
                        print "i:%d\tj:%d\tnb:%d\tc:%.3f\te:%.3f\t" % (
                            i, j, n_batches, c, err)

                    if err < best_validation_error:
                        best_validation_error = err
            print "best_validation_error:", best_validation_error

        if show_figure:
            plt.plot(LL)
            plt.show()