def fit(self,
            X,
            Y,
            learning_rate=10e-6,
            regularisation=10e-1,
            epochs=10000,
            show_fig=False):
        X, Y = shuffle(X, Y)

        # print("X.shape"+str(X.shape))
        # print("Y.shape"+str(Y.shape))
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        # Tvalid = y2indicator(Yvalid) # WE DONT NEED TVALID CAUSE WE ARE USING COST2
        X, Y = X[:-1000], Y[:-1000]
        # print("X.shape"+str(X.shape))
        # print("Y.shape"+str(Y.shape))
        N, D = X.shape
        K = len(set(Y))
        T = y2indicator(Y)  #Need this for gradient descent

        self.W1, self.b1 = init_weight_and_bias(D, self.M)
        self.W2, self.b2 = init_weight_and_bias(self.M, K)

        costs = []
        best_validation_error = 1
        for i in range(epochs):
            # forward propagation
            pY, Z = self.forward(X)

            # gradient descent
            pY_T = pY - T
            self.W2 -= learning_rate * (Z.T.dot(pY_T) +
                                        regularisation * self.W2)
            self.b2 -= learning_rate * (
                (pY_T).sum(axis=0) + regularisation * self.b2)

            # dZ = pY_T.dot(self.W2.T) * (Z>0) #Relu
            dZ = pY_T.dot(self.W2.T) * (1 - Z * Z)  # Tanh
            self.W1 -= learning_rate * (X.T.dot(dZ) + regularisation * self.W1)
            self.b1 -= learning_rate * (dZ.sum(axis=0) +
                                        regularisation * self.b1)

            if i % 10 == 0:
                pYvalid, _ = self.forward(Xvalid)
                c = cost2(Yvalid, pYvalid)
                costs.append(c)
                e = error_rate(Yvalid, np.argmax(pYvalid, axis=1))
                print("i : " + str(i) + "; Cost : " + str(c) + "; Error : " +
                      str(e))
                if e < best_validation_error:
                    best_validation_error = e

        print("Best Validation error : " + str(best_validation_error))

        if (show_fig):
            plt.plot(costs)
            plt.show()
Ejemplo n.º 2
0
    def fit(self,X,Y,learning_rate=5e-7,regularisation=1.0,epochs=10000,show_fig=False):
        X,Y = shuffle(X,Y)
        Y = np.reshape(Y,(len(Y),1)) #s
        # print("X.shape"+str(X.shape))
        # print("Y.shape"+str(Y.shape))
        Xvalid, Yvalid = X[-1000:],Y[-1000:]
        X,Y = X[:-1000],Y[:-1000]
        # print("X.shape"+str(X.shape))
        # print("Y.shape"+str(Y.shape))
        N,D = X.shape
        self.W1,self.b1 = init_weight_and_bias(D,self.M) #s
        self.W2,self.b2 = init_weight_and_bias(self.M,1) #s
        # self.W1 = np.random.randn(D, self.M) / np.sqrt(D) #lp
        # self.b1 = np.zeros(self.M) #lp
        # self.W2 = np.random.randn(self.M) / np.sqrt(self.M) #lp
        # self.b2 = 0 #lp

        costs = []
        best_validation_error = 1
        for i in range(epochs):
            # forward propagation
            pY, Z = self.forward(X)

            # gradient descent
            pY_Y = pY - Y
            # print("X.shape"+str(X.shape))
            # print("pY.shape"+str(pY.shape))
            # print("Y.shape"+str(Y.shape))
            # print("Z.shape"+str(Z.shape))
            # print("W2.shape"+str(self.W2.shape))
            # print("pY_Y.shape"+str(pY_Y.shape))
            self.W2 -= learning_rate*(Z.T.dot(pY_Y) + regularisation*self.W2)
            self.b2 -= learning_rate*(pY_Y.sum() + regularisation*self.b2)
            dZ = pY_Y.dot(self.W2.T) * (Z>0) #Relu
            dZ = pY_Y.dot(self.W2.T) * (1-Z*Z) #Relu
            # dZ = np.outer(pY_Y, self.W2) * (Z > 0) #lp

            self.W1 -= learning_rate*(X.T.dot(dZ) + regularisation*self.W1)
            self.b1 -= learning_rate*(np.sum(dZ,axis=0) + regularisation*self.b1)

            if i%20 ==0 :
                pYvalid ,_ = self.forward(Xvalid)
                # print("Yvalid.shape"+str(Yvalid.shape))
                # print("pYvalid.shape"+str(pYvalid.shape))
                c = sigmoid_cost(Yvalid,pYvalid)
                costs.append(c)
                e = error_rate(Yvalid, np.round(pYvalid))
                print("i : "+str(i)+"; Cost : "+str(c)+"; Error : "+str(e))
                if e < best_validation_error:
                    best_validation_error = e

        print("Best Validation error : "+str(best_validation_error))

        if(show_fig):
            plt.plot(costs)
            plt.show()
Ejemplo n.º 3
0
 def __init__(self, M1, M2):
     self.M1 = M1
     self.M2 = M2
     W, b = init_weight_and_bias(M1, M2)
     self.W = tf.Variable(W.astype(np.float32))
     self.b = tf.Variable(b.astype(np.float32))
     self.params = [self.W, self.b]
Ejemplo n.º 4
0
 def __init__(self, M1, M2, an_id):
     self.id = an_id
     self.M1 = M1
     self.M2 = M2
     W, b = init_weight_and_bias(M1, M2)
     self.W = tf.Variable(W.astype(np.float32), name='W%s' % self.id)
     self.b = tf.Variable(b.astype(np.float32), name='b%s' % self.id)
     self.parameters = [self.W, self.b]
 def __init__(self, M1, M2, an_id):
     self.id = an_id
     self.M1 = M1
     self.M2 = M2
     W, b = init_weight_and_bias(M1, M2)
     self.W = tf.Variable(W.astype(np.float32))
     self.b = tf.Variable(b.astype(np.float32))
     self.params = [self.W, self.b]
Ejemplo n.º 6
0
 def __init__(self, M1, M2, an_id):
     self.id = an_id
     self.M1 = M1
     self.M2 = M2
     W, b = init_weight_and_bias(M1, M2)
     self.W = tf.Variable(W)
     self.b = tf.Variable(b)
     self.params = [self.W, self.b]
Ejemplo n.º 7
0
 def __init__(self, M1, M2, an_id):
     self.id = an_id
     self.M1 = M1
     self.M2 = M2
     W0, b0 = init_weight_and_bias(M1, M2)
     self.W = tf.Variable(W0, name='W%s' % self.id)
     self.b = tf.Variable(b0, name='b%s' % self.id)
     self.params = [self.W, self.b]
Ejemplo n.º 8
0
 def __init__(self, M1, M2, an_id):
     self.id = an_id
     self.M1 = M1
     self.M2 = M2
     W, b = init_weight_and_bias(M1, M2)
     self.W = theano.shared(W, 'W_%s' % self.id)
     self.b = theano.shared(b, 'b_%s' % self.id)
     self.params = [self.W, self.b]
Ejemplo n.º 9
0
 def __init__(self, M1, M2, activation):
     self.activation = activation
     self.M1 = M1
     self.M2 = M2
     W, b = init_weight_and_bias(M1, M2)
     self.W = tf.Variable(W.astype(np.float32))
     self.b = tf.Variable(b.astype(np.float32))
     self.parameters = [self.W, self.b]
 def __init__(self, M1, M2, an_id):
     self.id = an_id
     self.M1 = M1
     self.M2 = M2
     W, b = init_weight_and_bias(M1, M2)
     self.W = theano.shared(W, 'W_%s' % self.id)
     self.b = theano.shared(b, 'b_%s' % self.id)
     self.params = [self.W, self.b]
 def __init__(self, M1, M2, an_id, f):
     self.id = an_id
     self.f = f  # activation function
     self.M1 = M1
     self.M2 = M2
     W, b = init_weight_and_bias(M1, M2)
     self.W = theano.shared(W, 'W%s' % self.id)
     self.b = theano.shared(b, 'b%s' % self.id)
     self.params = [self.W, self.b]
def neural_network(D, K, tfX):
    W1, b1 = init_weight_and_bias(D, n_nodes_h1)
    hidden_1_layer = {
        'w': tf.Variable(W1.astype(np.float32)),
        'b': tf.Variable(b1.astype(np.float32))
    }
    W2, b2 = init_weight_and_bias(n_nodes_h1, n_nodes_h2)
    hidden_2_layer = {
        'w': tf.Variable(W2.astype(np.float32)),
        'b': tf.Variable(b2.astype(np.float32))
    }
    W3, b3 = init_weight_and_bias(n_nodes_h2, n_nodes_h3)
    hidden_3_layer = {
        'w': tf.Variable(W3.astype(np.float32)),
        'b': tf.Variable(b3.astype(np.float32))
    }
    W4, b4 = init_weight_and_bias(n_nodes_h3, K)
    output_layer = {
        'w': tf.Variable(W4.astype(np.float32)),
        'b': tf.Variable(b4.astype(np.float32))
    }

    #forwarding
    l1 = tf.add(tf.matmul(tfX, hidden_1_layer['w']), hidden_1_layer['b'])
    l1 = tf.nn.relu(l1)

    l2 = tf.add(tf.matmul(l1, hidden_2_layer['w']), hidden_2_layer['b'])
    l2 = tf.nn.relu(l2)

    l3 = tf.add(tf.matmul(l2, hidden_3_layer['w']), hidden_3_layer['b'])
    l3 = tf.nn.relu(l3)

    output = tf.matmul(l3, output_layer['w'] + output_layer['b'])

    params.extend([W1, b1, W2, b2, W3, b3, W4, b4])

    return output
Ejemplo n.º 13
0
    def fit(self,X,Y,learning_rate=10e-8,regularisation=10e-12,epochs=10000,show_fig=False):
        X,Y = shuffle(X,Y)

        # print("X.shape"+str(X.shape))
        # print("Y.shape"+str(Y.shape))
        Xvalid, Yvalid = X[-1000:],Y[-1000:]
        Tvalid = y2indicator(Yvalid)
        X,Y = X[:-1000],Y[:-1000]
        # print("X.shape"+str(X.shape))
        # print("Y.shape"+str(Y.shape))
        N,D = X.shape
        K = len(set(Y))
        T = y2indicator(Y)


        self.W,self.b = init_weight_and_bias(D,K)


        costs = []
        best_validation_error = 1
        for i in range(epochs):
            # forward propagation
            pY = self.forward(X)

            # gradient descent
            self.W -= learning_rate*(X.T.dot(pY-T) + regularisation*self.W)
            self.b -= learning_rate*((pY-T).sum(axis=0) + regularisation*self.b)


            if i%10 ==0 :
                pYvalid = self.forward(Xvalid)
                c = cost(Tvalid,pYvalid)
                costs.append(c)
                e = error_rate(Yvalid, np.argmax(pYvalid,axis=1))
                print("i : "+str(i)+"; Cost : "+str(c)+"; Error : "+str(e))
                if e < best_validation_error:
                    best_validation_error = e

        print("Best Validation error : "+str(best_validation_error))

        if(show_fig):
            plt.plot(costs)
            plt.show()
    def fit(self, X, Y, lr=1e-3, mu=0.99, reg=1e-3, decay=0.99999, eps=1e-10, batch_sz=30, epochs=3, show_fig=True):
        lr = np.float32(lr)
        mu = np.float32(mu)
        reg = np.float32(reg)
        decay = np.float32(decay)
        eps = np.float32(eps)

        # make a validation set
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = Y.astype(np.int32)
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        X, Y = X[:-1000], Y[:-1000]

        # initialize convpool layers
        N, c, width, height = X.shape
        mi = c
        outw = width
        outh = height
        self.convpool_layers = []
        for mo, fw, fh in self.convpool_layer_sizes:
            layer = ConvPoolLayer(mi, mo, fw, fh)
            self.convpool_layers.append(layer)
            outw = (outw - fw + 1) // 2
            outh = (outh - fh + 1) // 2
            mi = mo

        # initialize mlp layers
        K = len(set(Y))
        self.hidden_layers = []
        M1 = self.convpool_layer_sizes[-1][0]*outw*outh # size must be same as output of last convpool layer
        count = 0
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1

        # logistic regression layer
        W, b = init_weight_and_bias(M1, K)
        self.W = theano.shared(W, 'W_logreg')
        self.b = theano.shared(b, 'b_logreg')

        # collect params for later use
        self.params = [self.W, self.b]
        for c in self.convpool_layers:
            self.params += c.params
        for h in self.hidden_layers:
            self.params += h.params

        # set up theano functions and variables
        thX = T.tensor4('X', dtype='float32')
        thY = T.ivector('Y')
        pY = self.forward(thX)

        rcost = reg*T.sum([(p*p).sum() for p in self.params])
        cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost
        prediction = self.th_predict(thX)

        cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction])

        updates = rmsprop(cost, self.params, lr, mu, decay, eps)
        train_op = theano.function(
            inputs=[thX, thY],
            outputs=cost,
            updates=updates
        )

        n_batches = N // batch_sz
        costs = []
        for i in range(epochs):
            X, Y = shuffle(X, Y)
            for j in range(n_batches):
                Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]
                Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]

                train_c = train_op(Xbatch, Ybatch)

                if j % 20 == 0:
                    c, p = cost_predict_op(Xvalid, Yvalid)
                    costs.append(c)
                    e = error_rate(Yvalid, p)
                    print(
                        "i:", i,
                        "j:", j,
                        "nb:", n_batches,
                        "train cost:", train_c,
                        "cost:", c,
                        "error rate:", e
                    )

        if show_fig:
            plt.plot(costs)
            plt.show()
Ejemplo n.º 15
0
    def fit(self,
            X,
            Y,
            lr=1e-6,
            mu=0.99,
            decay=0.999,
            reg=1e-11,
            eps=1e-9,
            epochs=300,
            batch_sz=100,
            show_fig=False):
        K = len(set(Y))

        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = y2indicator(Y).astype(np.int32)
        Xvalid = X[-1000:]  # last 1000
        Yvalid = Y[-1000:]  # last 1000
        Yvalid_flat = np.argmax(Yvalid, axis=1)
        X = X[:-1000]  # all but the last 1000
        Y = Y[:-1000]  # all but the last 1000

        N, D = X.shape
        self.hidden_layers = []
        M1 = D
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2)
            self.hidden_layers.append(h)
            M1 = M2
        W, b = init_weight_and_bias(M1, K)
        self.W = tf.Variable(W.astype(np.float32))
        self.b = tf.Variable(b.astype(np.float32))

        self.params = [self.W, self.b]
        for h in self.hidden_layers:
            self.params += h.params

        tfX = tf.placeholder(tf.float32, shape=(None, D), name='X')
        tfT = tf.placeholder(tf.float32, shape=(None, K), name='T')
        act = self.forward(tfX)

        rcost = reg * sum([tf.nn.l2_loss(p) for p in self.params])
        cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
            act, tfT)) + rcost
        prediction = self.predict(tfX)

        train_op = tf.train.RMSPropOptimizer(lr, decay=decay,
                                             momentum=mu).minimize(cost)

        n_batches = int(N / batch_sz)
        costs = []
        init = tf.initialize_all_variables()
        with tf.Session() as session:
            session.run(init)
            for i in range(epochs):
                X, Y = shuffle(X, Y)
                for j in range(n_batches):
                    Xbatch = X[j * batch_sz:(j + 1) * batch_sz]
                    Ybatch = Y[j * batch_sz:(j + 1) * batch_sz]

                    session.run(train_op, feed_dict={tfX: Xbatch, tfT: Ybatch})

                    if j % 20 == 0:
                        c = session.run(cost,
                                        feed_dict={
                                            tfX: Xvalid,
                                            tfT: Yvalid
                                        })
                        costs.append(c)
                        p = session.run(prediction,
                                        feed_dict={
                                            tfX: Xvalid,
                                            tfT: Yvalid
                                        })
                        e = error_rate(Yvalid_flat, p)
                        print("i:", i, "j:", j, "nb:", n_batches, "cost:", c,
                              "error rate:", e)

        if show_fig:
            plt.plot(costs)
            plt.show()
Ejemplo n.º 16
0
    def fit(self,
            X,
            Y,
            lr=10e-5,
            mu=0.99,
            reg=10e-7,
            decay=0.99999,
            eps=10e-3,
            batch_sz=30,
            epochs=100,
            show_fig=True):
        lr = np.float32(lr)
        mu = np.float32(mu)
        reg = np.float32(reg)
        decay = np.float32(decay)
        eps = np.float32(eps)

        # ============= Prep Data =============
        # Validation set
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = Y.astype(np.int32)
        # Valid set - last 1000 entries
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        # Training set - Everything except last 1000 entries
        X, Y = X[:-1000], Y[:-1000]

        # ============= Prep ConvPool layers =============
        # initialize convpool layers
        N, c, width, height = X.shape
        mi = c
        outw = width
        outh = height
        self.convpool_layers = []
        # For each parameterised convpool layer
        conv_layer_count = 0
        for mo, fw, fh in self.convpool_layer_sizes:
            layer = ConvPoolLayer(mi, mo, fw, fh,
                                  self.pool_sz[conv_layer_count])
            # Add layer
            self.convpool_layers.append(layer)
            # Output W after convolution layer
            outw = (outw - fw + 1) // self.pool_sz[conv_layer_count][0]
            outh = (outh - fh + 1) // self.pool_sz[conv_layer_count][1]
            # Set feature input to previous feature output
            # for the next loop
            mi = mo
            conv_layer_count += 1
        # ============= Prep ANN layers =============
        # K = length of all the unique values of Y
        K = len(set(Y))
        # list to store all the hidden layers
        self.hidden_layers = []
        # Output of last convpool layer feature output
        # This is to flatten the last convpool feature output as an input to the ANN
        M1 = self.convpool_layer_sizes[-1][
            0] * outw * outh  # size must be same as output of last convpool layer
        count = 0
        # Loop through the hidden layers in hidden_layer_sizes
        for M2 in self.hidden_layer_sizes:
            # Create hidden layer
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            # Set feature input to previous feature output
            # for the next loop
            M1 = M2
            count += 1
        # ============= Prep Log Regression layer =============
        W, b = init_weight_and_bias(M1, K)
        self.W = theano.shared(W, 'W_logreg')
        self.b = theano.shared(b, 'b_logreg')
        # ============= Collect parameters for SGD  =============
        self.params = [self.W, self.b]
        for c in self.convpool_layers:
            self.params += c.params
        for h in self.hidden_layers:
            self.params += h.params

        # momentum
        dparams = [
            theano.shared(np.zeros(p.get_value().shape, dtype=np.float32))
            for p in self.params
        ]
        # rmsprop
        cache = [
            theano.shared(np.zeros(p.get_value().shape, dtype=np.float32))
            for p in self.params
        ]
        # define theano variables - X and Y
        thX = T.tensor4('X', dtype='float32')
        thY = T.ivector('Y')
        # Probability of Y
        pY = self.forward(thX)
        # regularisation cost
        # rcost = reg_parameter*sum(each_parameter^2)
        rcost = reg * T.sum([(p * p).sum() for p in self.params])
        # cost = mean*log(all the relevant targets)
        cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost
        # prediction
        prediction = self.th_predict(thX)

        # function to calculate the prediction cost without updates
        # used to calculate cost of prediction for the validation set
        cost_predict_op = theano.function(inputs=[thX, thY],
                                          outputs=[cost, prediction])

        # momentum updates
        # momentum only. Update params and dparams
        updates = [(p, p + mu * dp - lr * T.grad(cost, p))
                   for p, dp in zip(self.params, dparams)
                   ] + [(dp, mu * dp - lr * T.grad(cost, p))
                        for p, dp in zip(self.params, dparams)]

        train_op = theano.function(inputs=[thX, thY], updates=updates)

        n_batches = N // batch_sz
        costs = []
        for i in range(epochs):
            X, Y = shuffle(X, Y)
            for j in range(n_batches):
                Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)]
                Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)]

                train_op(Xbatch, Ybatch)

                if j % 20 == 0:
                    c, p = cost_predict_op(Xvalid, Yvalid)
                    costs.append(c)
                    e = error_rate(Yvalid, p)
                    print("i:", i, "j:", j, "nb:", n_batches, "cost:", c,
                          "error rate:", e)

        if show_fig:
            plt.plot(costs)
            plt.savefig("cost.png")
    def fit(self, X, Y, Xvalid, Yvalid, lr=1e-3, mu=0.99, reg=1e-3, decay=0.99999, eps=1e-10, batch_sz=30, epochs=3, show_fig=True):
        # downcast
        lr = np.float32(lr)
        mu = np.float32(mu)
        reg = np.float32(reg)
        decay = np.float32(decay)
        eps = np.float32(eps)

        X = X.astype(np.float32)
        Xvalid = Xvalid.astype(np.float32)
        Y = Y.astype(np.int32)
        Yvalid = Yvalid.astype(np.int32)

        # initialize convpool layers
        N, c, width, height = X.shape
        mi = c
        outw = width
        outh = height
        self.convpool_layers = []
        for mo, fw, fh in self.convpool_layer_sizes:
            layer = ConvPoolLayer(mi, mo, fw, fh)
            self.convpool_layers.append(layer)
            outw = (outw - fw + 1) // 2
            outh = (outh - fh + 1) // 2
            mi = mo

        # initialize mlp layers
        K = len(set(Y))
        self.hidden_layers = []
        M1 = self.convpool_layer_sizes[-1][0]*outw*outh # size must be same as output of last convpool layer
        count = 0
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1

        # logistic regression layer
        W, b = init_weight_and_bias(M1, K)
        self.W = theano.shared(W, 'W_logreg')
        self.b = theano.shared(b, 'b_logreg')

        # collect params for later use
        self.params = [self.W, self.b]
        for c in self.convpool_layers:
            self.params += c.params
        for h in self.hidden_layers:
            self.params += h.params

        # set up theano functions and variables
        thX = T.tensor4('X', dtype='float32')
        thY = T.ivector('Y')
        pY = self.forward(thX)

        rcost = reg*T.sum([(p*p).sum() for p in self.params])
        cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost
        prediction = self.th_predict(thX)

        cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction])

        updates = rmsprop(cost, self.params, lr, mu, decay, eps)
        train_op = theano.function(
            inputs=[thX, thY],
            outputs=cost,
            updates=updates
        )

        n_batches = N // batch_sz
        costs = []
        for i in range(epochs):
            X, Y = shuffle(X, Y)
            for j in range(n_batches):
                Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]
                Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]

                train_c = train_op(Xbatch, Ybatch)

                if j % 20 == 0:
                    c, p = cost_predict_op(Xvalid, Yvalid)
                    costs.append(c)
                    e = error_rate(Yvalid, p)
                    print(
                        "i:", i,
                        "j:", j,
                        "nb:", n_batches,
                        "train cost:", train_c,
                        "cost:", c,
                        "error rate:", e
                    )

        if show_fig:
            plt.plot(costs)
            plt.show()
Ejemplo n.º 18
0
    def fit(
            self, X, Y, lr=10e-4, mu=0.99, reg=10e-4, decay=0.99999,
            eps=10e-3, batch_sz=30, epochs=100, show_fig=True
    ):
        # convert all of the params to float32
        lr = np.float32(lr)
        mu = np.float32(mu)
        reg = np.float32(reg)
        decay = np.float32(decay)
        eps = np.float32(eps)
        # K are the unique values of Y (number of classes)
        K = len(set(Y))
        # ============= Prep Data =============
        # Validation set
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = y2indicator(Y).astype(np.float32)
        # Valid set - last 1000 entries
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        # Training set - Everything except last 1000 entries
        X, Y = X[:-1000], Y[:-1000]
        # Flat version required, so that error can be calculated.
        Yvalid_flat = np.argmax(Yvalid, axis=1)
        # ============= Prep ConvPool layers =============
        # initialise convpool layers
        N, width, height, c_number = X.shape
        # input feature maps
        mi = c_number
        outw = width
        outh = height
        self.convpool_layers = []
        convpool_layer_count = 0
        # create convpool layers
        for mo, fw, fh in self.convpool_layer_sizes:
            layer = ConvPoolLayer(
                mi, mo, fw, fh,
                self.strides[convpool_layer_count],
                self.pool_sz[convpool_layer_count],
                self.pool_strides[convpool_layer_count]
            )
            self.convpool_layers.append(layer)
            outw = outw // self.pool_sz[convpool_layer_count][1]
            outh = outh // self.pool_sz[convpool_layer_count][2]
            mi = mo
            convpool_layer_count += 1
        # ============= Prep ANN layers =============
        # Hidden layers
        self.hidden_layers = []
        M1 = self.convpool_layer_sizes[-1][0]*outw*outh
        hidden_layer_count = 0
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, hidden_layer_count)
            self.hidden_layers.append(h)
            M1 = M2
            hidden_layer_count += 1
        # ============= prep log regression layer =============
        W, b = init_weight_and_bias(M1, K)
        self.W = tf.Variable(W, 'W_logreg')
        self.b = tf.Variable(b, 'b_logreg')
        # ============= collect params =============
        self.params = [self.W, self.b]
        # collect convpool
        for h in self.convpool_layers:
            self.params += h.params
        # collect hidden
        for h in self.hidden_layers:
            self.params += h.params
        # ============= init tensorflow variables =============
        tfX = tf.placeholder(tf.float32, shape=(None, width, height, c_number), name='X')
        tfY = tf.placeholder(tf.float32, shape=(None, K), name='Y')
        # not doing softmax, calculating our own activation function
        act = self.forward(tfX)
        # reg cost - regularisation*sum of L2 loss for every parameter
        rcost = reg*sum([tf.nn.l2_loss(p) for p in self.params])
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                logits=act,
                labels=tfY
            )
        ) + rcost
        prediction = self.predict(tfX)
        # ============= init train function =============
        train_op = tf.train.RMSPropOptimizer(lr, decay=decay, momentum=mu).minimize(cost)
        # calculate number of batches
        n_batches = N // batch_sz
        # initialise costs array
        costs = []
        init = tf.global_variables_initializer()
        # ============= init tf session =============
        with tf.Session() as session:
            session.run(init)
            for i in range(epochs):
                X, Y = shuffle(X, Y)
                for j in range(n_batches):
                    Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]
                    Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]

                    session.run(train_op, feed_dict={tfX: Xbatch, tfY: Ybatch})

                    if j % 20 == 0:
                        # calculate costs
                        c_out = session.run(cost, feed_dict={tfX: Xvalid, tfY: Yvalid})
                        costs.append(c_out)
                        # calculate prediction
                        p = session.run(prediction, feed_dict={tfX: Xvalid, tfY: Yvalid})
                        # calculcate error rate
                        e = error_rate(Yvalid_flat, p)
                        print("i:", i, "j:", j, "nb:", n_batches, "cost:", c_out, "error rate:", e)
        if show_fig:
            plt.plot(costs)
            plt.savefig("cost.png")
Ejemplo n.º 19
0
    def fit(self,
            X,
            Y,
            lr=10e-5,
            mu=0.99,
            reg=10e-7,
            decay=0.99999,
            eps=10e-3,
            batch_sz=30,
            epochs=100,
            show_fig=True):
        lr = np.float32(lr)
        mu = np.float32(mu)
        reg = np.float32(reg)
        decay = np.float32(decay)
        eps = np.float32(eps)

        # make a validation set
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = Y.astype(np.int32)
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        X, Y = X[:-1000], Y[:-1000]

        # initialize convpool layers
        N, c, width, height = X.shape
        mi = c
        outw = width
        outh = height
        self.convpool_layers = []
        for mo, fw, fh in self.convpool_layer_sizes:
            layer = ConvPoolLayer(mi, mo, fw, fh)
            self.convpool_layers.append(layer)
            outw = (outw - fw + 1) // 2
            outh = (outh - fh + 1) // 2
            mi = mo

        # initialize mlp layers
        K = len(set(Y))
        self.hidden_layers = []
        M1 = self.convpool_layer_sizes[-1][
            0] * outw * outh  # size must be same as output of last convpool layer
        count = 0
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1

        # logistic regression layer
        W, b = init_weight_and_bias(M1, K)
        self.W = theano.shared(W, 'W_logreg')
        self.b = theano.shared(b, 'b_logreg')

        # collect params for later use
        self.params = [self.W, self.b]
        for c in self.convpool_layers:
            self.params += c.params
        for h in self.hidden_layers:
            self.params += h.params

        # for momentum
        dparams = [
            theano.shared(np.zeros(p.get_value().shape, dtype=np.float32))
            for p in self.params
        ]

        # for rmsprop
        cache = [
            theano.shared(np.zeros(p.get_value().shape, dtype=np.float32))
            for p in self.params
        ]

        # set up theano functions and variables
        thX = T.tensor4('X', dtype='float32')
        thY = T.ivector('Y')
        pY = self.forward(thX)

        rcost = reg * T.sum([(p * p).sum() for p in self.params])
        cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost
        prediction = self.th_predict(thX)

        cost_predict_op = theano.function(inputs=[thX, thY],
                                          outputs=[cost, prediction])

        # updates = [
        #     (c, decay*c + (np.float32(1)-decay)*T.grad(cost, p)*T.grad(cost, p)) for p, c in zip(self.params, cache)
        # ] + [
        #     (p, p + mu*dp - lr*T.grad(cost, p)/T.sqrt(c + eps)) for p, c, dp in zip(self.params, cache, dparams)
        # ] + [
        #     (dp, mu*dp - lr*T.grad(cost, p)/T.sqrt(c + eps)) for p, c, dp in zip(self.params, cache, dparams)
        # ]

        # momentum only
        updates = [(p, p + mu * dp - lr * T.grad(cost, p))
                   for p, dp in zip(self.params, dparams)
                   ] + [(dp, mu * dp - lr * T.grad(cost, p))
                        for p, dp in zip(self.params, dparams)]

        train_op = theano.function(inputs=[thX, thY], updates=updates)

        n_batches = N // batch_sz
        costs = []
        for i in range(epochs):
            X, Y = shuffle(X, Y)
            for j in range(n_batches):
                Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)]
                Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)]

                train_op(Xbatch, Ybatch)

                if j % 20 == 0:
                    c, p = cost_predict_op(Xvalid, Yvalid)
                    costs.append(c)
                    e = error_rate(Yvalid, p)
                    print("i:", i, "j:", j, "nb:", n_batches, "cost:", c,
                          "error rate:", e)

        if show_fig:
            plt.plot(costs)
            plt.show()
Ejemplo n.º 20
0
    def train(self,
              X,
              Y,
              learning_rate=10e-4,
              mu=.99,
              reg=10e-4,
              decay=0.9999,
              eps=10e-3,
              batch_sz=100,
              epochs=3,
              dispFig=True):
        print('Training model...')
        # tensorflow expects inputs to be of same data format
        learning_rate = np.float32(learning_rate)
        mu = np.float32(mu)
        decay = np.float32(decay)
        eps = np.float32(eps)

        # input data should have shape (N, im_W, im_H, color_channels)
        X, Y = shuffle(X, Y)
        N, im_W, im_H, color_channels = X.shape
        K = len(np.unique(Y))  # number of classes
        # check if input truths are vector or one hot encoded
        if len(Y.shape) == 1 or Y.shape[1] != K:
            Y_ind = y2indicator(Y).astype(np.float32)
        else:
            Y_ind = Y
        X = X.astype(np.float32)  # just a precaution...

        # use 80% of data for test, 20% for validation set
        # initialize tensorflow var X with shape (NONE, w,h,color)
        numTrain = round(N * .8)
        numTest = round(N * .2)
        trainIdx = makeDiv(numTrain, batch_sz)
        validIdx = makeDiv(numTest, batch_sz)
        Xtrain = X[:trainIdx, ]
        Ytrain = Y_ind[:trainIdx, ]
        Xvalid = X[-validIdx:, ]
        Yvalid = Y_ind[-validIdx:, ]

        # init Convpool layers
        inputMap_sz = X.shape[-1]
        self.convpoolLayers = []
        outW = im_W
        outH = im_H
        for outMap, filter_W, filter_H in self.convpool_sz:
            self.convpoolLayers.append(
                Convpool(inputMap_sz, outMap, filter_W, filter_H))
            inputMap_sz = outMap
            outW = outW // 2
            outH = outH // 2

        # init MLP layers
        self.hiddenLayers = []
        hiddenInput_shp = inputMap_sz * outW * outH
        for m in self.hidden_sz:
            self.hiddenLayers.append(HiddenLayer(hiddenInput_shp, m))
            hiddenInput_shp = m
        V, c = init_weight_and_bias(hiddenInput_shp, K)
        self.V = tf.Variable(V)
        self.c = tf.Variable(c)

        # collect params for use in updates
        self.params = [self.V, self.c]
        for h in self.convpoolLayers:
            self.params += h.params
        for h in self.hiddenLayers:
            self.params += h.params

        tfX = tf.placeholder(tf.float32,
                             shape=(None, im_W, im_H, color_channels),
                             name='X')
        tfY = tf.placeholder(tf.float32, shape=(None, K), name='Y')
        Z_logreg = self.forward(tfX)

        rcost = reg * sum([tf.nn.l2_loss(p)
                           for p in self.params])  # calculate l2 penalty
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=Z_logreg,
                                                    labels=tfY)) + rcost
        prediction = self.predict(tfX)

        train_op = tf.train.RMSPropOptimizer(learning_rate,
                                             decay=decay,
                                             momentum=mu).minimize(cost)

        n_batches = len(Xtrain) // batch_sz

        costs = []
        init = tf.global_variables_initializer()
        with tf.Session() as sess:
            sess.run(init)
            for i in range(epochs):
                Xtrain, Ytrain = shuffle(Xtrain, Ytrain)
                for j in range(n_batches):
                    Xbatch = Xtrain[j * batch_sz:(j * batch_sz + batch_sz), ]
                    Ybatch = Ytrain[j * batch_sz:(j * batch_sz + batch_sz), ]

                    sess.run(train_op, feed_dict={tfX: Xbatch, tfY: Ybatch})

                    if j % 10 == 0:
                        c = sess.run(cost,
                                     feed_dict={
                                         tfX: Xvalid,
                                         tfY: Yvalid
                                     })
                        costs.append(c)

                        p = sess.run(prediction,
                                     feed_dict={
                                         tfX: Xvalid,
                                         tfY: Yvalid
                                     })
                        e = error_rate(np.argmax(Yvalid, axis=1), p)
                        print('Epoch: {}\t batch: {}\t cost: {}\t error: {}'.
                              format(i, j, c, e))

        print('Final Accuracy: {}'.format(1 - e))
        if dispFig:
            plt.plot(costs)
            plt.xlabel('Epochs')
            plt.ylabel('Cost')
            plt.show()
        return costs, (1 - e)
    def fit(self,
            X,
            Y,
            learning_rate=10e-4,
            mu=0.99,
            decay=0.999,
            reg=10e-3,
            epochs=400,
            batch_sz=128,
            show_fig=False):
        K = len(set(Y))

        # make a validation set
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = y2indicator(Y).astype(np.float32)
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        Yvalid_flat = np.argmax(Yvalid, axis=1)
        X, Y = X[:-1000], Y[:-1000]

        # intialize hidden layers
        N, D = X.shape
        self.hidden_layers = []
        M1 = D
        count = 0
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2  #output of last layer is input of next
            count += 1

        # initaliz params of output layers
        W, b = init_weight_and_bias(M1, K)
        self.W = tf.Variable(W.astype(np.float32))
        self.b = tf.Variable(b.astype(np.float32))

        self.params = [self.W, self.b]
        for h in self.hidden_layers:
            self.params += h.params

        tfX = tf.placeholder(tf.float32, shape=(None, D), name='X')
        tfT = tf.placeholder(tf.float32, shape=(None, K), name='T')
        act = self.forward(tfX)

        rcost = reg * sum([tf.nn.l2_loss(p) for p in self.params])
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=act,
                                                    labels=tfT)) + rcost
        predction = self.predict(tfX)
        train_op = tf.train.RMSPropOptimizer(learning_rate,
                                             decay=decay,
                                             momentum=mu).minimize(cost)

        n_batches = int(N / batch_sz)
        costs = []
        init = tf.initialize_all_variables()
        with tf.Session() as session:
            session.run(init)
            for i in range(epochs):
                X, Y = shuffle(X, Y)
                for j in range(n_batches):
                    Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)]
                    Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)]

                    session.run(train_op, feed_dict={tfX: Xbatch, tfT: Ybatch})

                    if j % 20 == 0:
                        c = session.run(cost,
                                        feed_dict={
                                            tfX: Xvalid,
                                            tfT: Yvalid
                                        })
                        costs.append(c)

                        p = session.run(predction,
                                        feed_dict={
                                            tfX: Xvalid,
                                            tfT: Yvalid
                                        })
                        e = error_rate(Yvalid_flat, p)
                        print("i:", i, "j:", j, "nb:", n_batches, "cost:", c,
                              "error_rate", e)

        if show_fig:
            plt.plot(costs)
            plt.show()
    def fit(self, X, Y, learning_rate=10e-7, mu=0.99, decay=0.999, reg=10e-12, eps=10e-10, epochs=400, batch_sz=100, show_fig=False):
        learning_rate = np.float32(learning_rate)
        mu = np.float32(mu)
        decay = np.float32(decay)
        reg = np.float32(reg)
        eps = np.float32(eps)

        # make a validation set
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = Y.astype(np.int32)
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        X, Y = X[:-1000], Y[:-1000]

        # initialize hidden layers
        N, D = X.shape
        K = len(set(Y))
        self.hidden_layers = []
        M1 = D
        count = 0
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1
        W, b = init_weight_and_bias(M1, K)
        self.W = theano.shared(W, 'W_logreg')
        self.b = theano.shared(b, 'b_logreg')

        # collect params for later use
        self.params = [self.W, self.b]
        for h in self.hidden_layers:
            self.params += h.params

        # for momentum
        dparams = [theano.shared(np.zeros(p.get_value().shape, dtype=np.float32)) for p in self.params]

        # for rmsprop
        cache = [theano.shared(np.zeros(p.get_value().shape, dtype=np.float32)) for p in self.params]

        # set up theano functions and variables
        thX = T.fmatrix('X')
        thY = T.ivector('Y')
        pY = self.forward(thX)

        rcost = reg*T.sum([(p*p).sum() for p in self.params])
        cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost
        prediction = self.predict(thX)

        cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction])

        updates = [
            (c, decay*c + (np.float32(1)-decay)*T.grad(cost, p)*T.grad(cost, p)) for p, c in zip(self.params, cache)
        ] + [
            (p, p + mu*dp - learning_rate*T.grad(cost, p)/T.sqrt(c + eps)) for p, c, dp in zip(self.params, cache, dparams)
        ] + [
            (dp, mu*dp - learning_rate*T.grad(cost, p)/T.sqrt(c + eps)) for p, c, dp in zip(self.params, cache, dparams)
        ]

        # momentum only
        # updates = [
        #     (p, p + mu*dp - learning_rate*T.grad(cost, p)) for p, dp in zip(self.params, dparams)
        # ] + [
        #     (dp, mu*dp - learning_rate*T.grad(cost, p)) for p, dp in zip(self.params, dparams)
        # ]

        train_op = theano.function(
            inputs=[thX, thY],
            updates=updates
        )

        n_batches = N / batch_sz
        costs = []
        for i in xrange(epochs):
            X, Y = shuffle(X, Y)
            for j in xrange(n_batches):
                Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]
                Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]

                train_op(Xbatch, Ybatch)

                if j % 20 == 0:
                    c, p = cost_predict_op(Xvalid, Yvalid)
                    costs.append(c)
                    e = error_rate(Yvalid, p)
                    print "i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e
        
        if show_fig:
            plt.plot(costs)
            plt.show()
Ejemplo n.º 23
0
    def fit(self,
            X,
            Y,
            Xvalid,
            Yvalid,
            learning_rate=1e-3,
            mu=0.99,
            decay=0.999,
            reg=1e-3,
            epoches=10,
            batch_sz=100,
            show_fig=False):
        # step1. get data
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = y2indicator(Y).astype(np.int32)
        Xvalid = Xvalid.astype(np.float32)
        Yvalid_vector = Yvalid.astype(np.int32)
        Yvalid = y2indicator(Yvalid).astype(np.int32)

        # step1.1 initialize each layer and parameters(with tf.Variable) of NN and keep them in a list
        N, D = X.shape
        M1 = D
        K = Y.shape[1]
        self.hidden_layers = []  # for saving HiddenLayer object
        count = 0
        for M2 in self.hidden_layer_size:  # 這邊做出第一層~ 倒數第二層 hidden layer 的HiddenLayer object
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1

        W, b = init_weight_and_bias(M1, K)  # 最後輸出層的 weight
        self.W = tf.Variable(W.astype(np.float32))
        self.b = tf.Variable(b.astype(np.float32))

        # collect all the parameters that we are going to use grediant descent
        self.params = [self.W, self.b]
        for layer in self.hidden_layers:
            self.params += layer.params

        # step1.2 tf.palceholder
        tfX = tf.placeholder(tf.float32, shape=(None, D), name="X")
        tfT = tf.placeholder(tf.float32, shape=(None, K), name="T")

        # step2. model
        act = self.forward(
            tfX)  # 最後不經過softmax喔,也不通過其他的activation fun(因為tf 就是這樣要求的)

        # step3. cost function
        rcost = reg * sum([tf.nn.l2_loss(p) for p in self.params])
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits_v2(logits=act,
                                                       labels=tfT)) + rcost

        prediction_op = self.predict(tfX)

        # step4. solver
        traiin_op = tf.train.RMSPropOptimizer(learning_rate=learning_rate,
                                              momentum=mu,
                                              decay=decay).minimize(cost)

        init = tf.global_variables_initializer()

        n_batches = N // batch_sz
        costs = []
        with tf.Session() as sess:
            sess.run(init)

            for i in range(epoches):
                for j in range(n_batches):
                    Xbatch = X[j * batch_sz:(j + 1) * batch_sz, ]
                    Ybatch = Y[j * batch_sz:(j + 1) * batch_sz, ]

                    sess.run(traiin_op, feed_dict={tfX: Xbatch, tfT: Ybatch})
                    if j % 50 == 0:
                        cost_val = sess.run(cost,
                                            feed_dict={
                                                tfX: Xvalid,
                                                tfT: Yvalid
                                            })
                        costs.append(cost_val)
                        preds = sess.run(prediction_op,
                                         feed_dict={tfX: Xvalid})
                        err = error_rate(Yvalid_vector, preds)
                        print("i:", i, "j:", j, "nb:", n_batches, "cost:",
                              cost_val, "error rate:", err)

        if show_fig:
            plt.plot(costs)
            plt.show()
Ejemplo n.º 24
0
    def fit(self,
            X,
            Y,
            learning_rate=1e-2,
            mu=0.99,
            decay=0.999,
            reg=1e-3,
            epochs=10,
            batch_sz=100,
            show_fig=False):
        K = len(set(Y))  # won't work later b/c we turn it into indicator

        # make a validation set
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = y2indicator(Y).astype(np.float32)
        # Y = Y.astype(np.int32)
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        Yvalid_flat = np.argmax(Yvalid, axis=1)  # for calculating error rate
        X, Y = X[:-1000], Y[:-1000]

        # initialize hidden layers
        N, D = X.shape

        self.hidden_layers = []
        M1 = D
        count = 0
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1
        W, b = init_weight_and_bias(M1, K)
        self.W = tf.Variable(W.astype(np.float32))
        self.b = tf.Variable(b.astype(np.float32))

        # collect params for later use
        self.params = [self.W, self.b]
        for h in self.hidden_layers:
            self.params += h.params

        # set up theano functions and variables
        tfX = tf.placeholder(tf.float32, shape=(None, D), name='X')
        tfT = tf.placeholder(tf.float32, shape=(None, K), name='T')
        act = self.forward(tfX)

        rcost = reg * sum([tf.nn.l2_loss(p) for p in self.params])
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=act,
                                                    labels=tfT)) + rcost
        prediction = self.predict(tfX)
        train_op = tf.train.RMSPropOptimizer(learning_rate,
                                             decay=decay,
                                             momentum=mu).minimize(cost)

        n_batches = N // batch_sz
        costs = []
        init = tf.global_variables_initializer()
        with tf.Session() as session:
            session.run(init)
            for i in range(epochs):
                X, Y = shuffle(X, Y)
                for j in range(n_batches):
                    Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)]
                    Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)]

                    session.run(train_op, feed_dict={tfX: Xbatch, tfT: Ybatch})

                    if j % 20 == 0:
                        c = session.run(cost,
                                        feed_dict={
                                            tfX: Xvalid,
                                            tfT: Yvalid
                                        })
                        costs.append(c)

                        p = session.run(prediction,
                                        feed_dict={
                                            tfX: Xvalid,
                                            tfT: Yvalid
                                        })
                        e = error_rate(Yvalid_flat, p)
                        print("i:", i, "j:", j, "nb:", n_batches, "cost:", c,
                              "error rate:", e)

        # TODO: ask lazy programmer how to make a score function.
        # For this lecture: https://www.udemy.com/data-science-deep-learning-in-theano-tensorflow/learn/v4/t/lecture/5228492?start=0

        if show_fig:
            plt.plot(costs)
            plt.show()
    def fit(self, X, Y, lr=10e-4, mu=0.99, reg=10e-4, decay=0.99999, eps=10e-3, batch_sz=30, epochs=3, show_fig=True):
        lr = np.float32(lr)
        mu = np.float32(mu)
        reg = np.float32(reg)
        decay = np.float32(decay)
        eps = np.float32(eps)
        K = len(set(Y))

        # make a validation set
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = y2indicator(Y).astype(np.float32)

        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        X, Y = X[:-1000], Y[:-1000]
        Yvalid_flat = np.argmax(Yvalid, axis=1) # for calculating error rate

        # initialize convpool layers
        N, d, d, c = X.shape
        mi = c
        outw = d
        outh = d
        self.convpool_layers = []
        for mo, fw, fh in self.convpool_layer_sizes:
            layer = ConvPoolLayer(mi, mo, fw, fh)
            self.convpool_layers.append(layer)
            outw = outw / 2
            outh = outh / 2
            mi = mo

        # initialize mlp layers
        self.hidden_layers = []
        M1 = self.convpool_layer_sizes[-1][0]*outw*outh # size must be same as output of last convpool layer
        count = 0
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1

        # logistic regression layer
        W, b = init_weight_and_bias(M1, K)
        self.W = tf.Variable(W, 'W_logreg')
        self.b = tf.Variable(b, 'b_logreg')

        # collect params for later use
        self.params = [self.W, self.b]
        for h in self.convpool_layers:
            self.params += h.params
        for h in self.hidden_layers:
            self.params += h.params

        # set up tensorflow functions and variables
        tfX = tf.placeholder(tf.float32, shape=(None, d, d, c), name='X')
        tfY = tf.placeholder(tf.float32, shape=(None, K), name='Y')
        act = self.forward(tfX)

        rcost = reg*sum([tf.nn.l2_loss(p) for p in self.params])
        cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(act, tfY)) + rcost
        prediction = self.predict(tfX)

        train_op = tf.train.RMSPropOptimizer(lr, decay=decay, momentum=mu).minimize(cost)

        n_batches = N / batch_sz
        costs = []
        init = tf.initialize_all_variables()
        with tf.Session() as session:
            session.run(init)
            for i in xrange(epochs):
                X, Y = shuffle(X, Y)
                for j in xrange(n_batches):
                    Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]
                    Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]

                    session.run(train_op, feed_dict={tfX: Xbatch, tfY: Ybatch})

                    if j % 20 == 0:
                        c = session.run(cost, feed_dict={tfX: Xvalid, tfY: Yvalid})
                        costs.append(c)

                        p = session.run(prediction, feed_dict={tfX: Xvalid, tfY: Yvalid})
                        e = error_rate(Yvalid_flat, p)
                        print "i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e

        if show_fig:
            plt.plot(costs)
            plt.show()
    def fit(self,
            X,
            Y,
            lr=10e-4,
            mu=0.99,
            reg=10e-4,
            decay=0.99999,
            eps=10e-3,
            batch_sz=30,
            epochs=3,
            show_fig=True):
        lr = np.float32(lr)
        mu = np.float32(mu)
        reg = np.float32(reg)
        decay = np.float32(decay)
        eps = np.float32(eps)
        K = len(set(Y))

        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = y2indicator(Y).astype(np.float32)

        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        X, Y = X[:-1000], Y[:-1000]
        Yvalid_flat = np.argmax(Yvalid, axis=1)

        N, d, d, c = X.shape
        mi = c
        outw = d
        outh = d
        self.convpool_layers = []
        for mo, fw, fh in self.convpool_layer_sizes:
            layer = ConvPoolLayer(mi, mo, fw, fh)
            self.convpool_layers.append(layer)
            outw = outw / 2
            outh = outh / 2
            mi = mo

        self.hidden_layers = []
        M1 = int(self.convpool_layer_sizes[-1][0] * outw * outh)
        count = 0
        for M2 in self.hidden_layer_sizes:
            print(M1, M2)
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1
        W, b = init_weight_and_bias(M1, K)
        self.W = tf.Variable(W, 'W_logreg')
        self.b = tf.Variable(b, 'b_log')

        self.params = [self.W, self.b]
        for h in self.convpool_layers:
            self.params += h.params
        for h in self.hidden_layers:
            self.params += h.params

        tfX = tf.placeholder(tf.float32, shape=(None, d, d, c), name='X')
        tfY = tf.placeholder(tf.float32, shape=(None, K), name='Y')
        act = self.forward(tfX)

        rcost = reg * sum([tf.nn.l2_loss(p) for p in self.params])
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=act,
                                                    labels=tfY)) + rcost
        prediction = self.predict(tfX)

        train_op = tf.train.RMSPropOptimizer(lr, decay=decay,
                                             momentum=mu).minimize(cost)

        n_batches = N // batch_sz
        costs = []
        init = tf.initialize_all_variables()

        with tf.Session() as session:
            session.run(init)
            for i in range(epochs):
                X, Y = shuffle(X, Y)
                for j in range(n_batches):
                    Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)]
                    Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)]

                    session.run(train_op, feed_dict={tfX: Xbatch, tfY: Ybatch})

                    if j % 20 == 0:
                        c = session.run(cost,
                                        feed_dict={
                                            tfX: Xvalid,
                                            tfY: Yvalid
                                        })
                        costs.append(c)

                        p = session.run(prediction,
                                        feed_dict={
                                            tfX: Xvalid,
                                            tfY: Yvalid
                                        })
                        e = error_rate(Yvalid_flat, p)
                        print("i:", i, "j:", j, "nb:", n_batches, "cost:", c,
                              "error_rate:", e)

        if show_fig:
            plt.plot(costs)
            plt.show()
    def fit(self, X, Y, learning_rate=1e-2, mu=0.99, decay=0.999, reg=1e-3, epochs=10, batch_sz=100, show_fig=False):
        K = len(set(Y)) # won't work later b/c we turn it into indicator

        # make a validation set
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = y2indicator(Y).astype(np.float32)
        # Y = Y.astype(np.int32)
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        Yvalid_flat = np.argmax(Yvalid, axis=1) # for calculating error rate
        X, Y = X[:-1000], Y[:-1000]

        # initialize hidden layers
        N, D = X.shape
        
        self.hidden_layers = []
        M1 = D
        count = 0
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1
        W, b = init_weight_and_bias(M1, K)
        self.W = tf.Variable(W.astype(np.float32))
        self.b = tf.Variable(b.astype(np.float32))

        # collect params for later use
        self.params = [self.W, self.b]
        for h in self.hidden_layers:
            self.params += h.params

        # set up theano functions and variables
        tfX = tf.placeholder(tf.float32, shape=(None, D), name='X')
        tfT = tf.placeholder(tf.float32, shape=(None, K), name='T')
        act = self.forward(tfX)

        rcost = reg*sum([tf.nn.l2_loss(p) for p in self.params])
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                logits=act,
                labels=tfT
            )
        ) + rcost
        prediction = self.predict(tfX)
        train_op = tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=mu).minimize(cost)

        n_batches = N // batch_sz
        costs = []
        init = tf.global_variables_initializer()
        with tf.Session() as session:
            session.run(init)
            for i in range(epochs):
                X, Y = shuffle(X, Y)
                for j in range(n_batches):
                    Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]
                    Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]

                    session.run(train_op, feed_dict={tfX: Xbatch, tfT: Ybatch})

                    if j % 20 == 0:
                        c = session.run(cost, feed_dict={tfX: Xvalid, tfT: Yvalid})
                        costs.append(c)

                        p = session.run(prediction, feed_dict={tfX: Xvalid, tfT: Yvalid})
                        e = error_rate(Yvalid_flat, p)
                        print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e)
        
        if show_fig:
            plt.plot(costs)
            plt.show()
Ejemplo n.º 28
0
    def fit(self,
            X,
            Y,
            learning_rate=10e-7,
            mu=0.99,
            decay=0.999,
            reg=10e-12,
            eps=10e-10,
            epochs=400,
            batch_sz=100,
            show_fig=False):
        learning_rate = np.float32(learning_rate)
        mu = np.float32(mu)
        decay = np.float32(decay)
        reg = np.float32(reg)
        eps = np.float32(eps)

        # make a validation set
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = Y.astype(np.int32)
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        X, Y = X[:-1000], Y[:-1000]

        # initialize hidden layers
        N, D = X.shape
        K = len(set(Y))
        self.hidden_layers = []
        M1 = D
        count = 0
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1
        W, b = init_weight_and_bias(M1, K)
        self.W = theano.shared(W, 'W_logreg')
        self.b = theano.shared(b, 'b_logreg')

        # collect params for later use
        self.params = [self.W, self.b]
        for h in self.hidden_layers:
            self.params += h.params

        # for momentum
        dparams = [
            theano.shared(np.zeros(p.get_value().shape, dtype=np.float32))
            for p in self.params
        ]

        # for rmsprop
        cache = [
            theano.shared(np.zeros(p.get_value().shape, dtype=np.float32))
            for p in self.params
        ]

        # set up theano functions and variables
        thX = T.fmatrix('X')
        thY = T.ivector('Y')
        pY = self.th_forward(thX)

        rcost = reg * T.sum([(p * p).sum() for p in self.params])
        cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost
        prediction = self.th_predict(thX)

        # actual prediction function
        self.predict_op = theano.function(inputs=[thX], outputs=prediction)
        cost_predict_op = theano.function(inputs=[thX, thY],
                                          outputs=[cost, prediction])

        updates = [
            (c, decay * c +
             (np.float32(1) - decay) * T.grad(cost, p) * T.grad(cost, p))
            for p, c in zip(self.params, cache)
        ] + [
            (p,
             p + mu * dp - learning_rate * T.grad(cost, p) / T.sqrt(c + eps))
            for p, c, dp in zip(self.params, cache, dparams)
        ] + [(dp, mu * dp - learning_rate * T.grad(cost, p) / T.sqrt(c + eps))
             for p, c, dp in zip(self.params, cache, dparams)]

        # momentum only
        # updates = [
        #     (p, p + mu*dp - learning_rate*T.grad(cost, p)) for p, dp in zip(self.params, dparams)
        # ] + [
        #     (dp, mu*dp - learning_rate*T.grad(cost, p)) for p, dp in zip(self.params, dparams)
        # ]

        train_op = theano.function(inputs=[thX, thY], updates=updates)

        n_batches = N // batch_sz
        costs = []
        for i in range(epochs):
            X, Y = shuffle(X, Y)
            for j in range(n_batches):
                Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)]
                Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)]

                train_op(Xbatch, Ybatch)

                if j % 20 == 0:
                    c, p = cost_predict_op(Xvalid, Yvalid)
                    costs.append(c)
                    e = error_rate(Yvalid, p)
                    print("i:", i, "j:", j, "nb:", n_batches, "cost:", c,
                          "error rate:", e)

        if show_fig:
            plt.plot(costs)
            plt.show()
Ejemplo n.º 29
0
	def fit(self, X, Y, lr=1e-3, mu=0.99, reg=1e-3, decay=0.99999, eps=1e-10, batch_size=30, epochs=10, display_cost=False):
		lr = np.float32(lr)
		mu = np.float32(mu)
		reg = np.float32(reg)
		decay = np.float32(decay)
		eps = np.float32(eps)

		X, Y = shuffle(X, Y)
		X = X.astype(np.float32)
		Y = Y.astype(np.int32)

		# create a validation set:
		Xvalid, Yvalid = X[-1000:,], Y[-1000:]
		X, Y = X[:-1000,], Y[:-1000]

		# initialize convpool layers:
		N, c, height, width = X.shape
		mi = c 
		outh = height
		outw = width
		self.convpool_layers = []
		for mo, fh, fw in self.convpool_layer_sizes:
			layer = ConvPoolLayer(mi, mo, fh, fw)
			self.convpool_layers.append(layer)
			# output volume height and width 
			# after the current convpool layer:
			outh = (outh - fh + 1) // 2
			outw = (outw - fh + 1) // 2
			mi = mo

		# initialize mlp layers:
		K = len(set(Y))
		self.hidden_layers = []
		# size must be the same as output of last convpool layer:
		M1 = self.convpool_layer_sizes[-1][0]*outh*outw
		count = 0 # will be used to id hidden layers
		for M2 in self.hidden_layer_sizes:
			h = HiddenLayer(M1, M2, count)
			self.hidden_layers.append(h)
			M1 = M2
			count += 1

		# the last layer - softmax output:
		W, b = init_weight_and_bias(M1, K)
		self.W = theano.shared(W, 'W_output')
		self.b = theano.shared(b, 'b_output')

		# collect params:
		self.params = []
		for layer in self.convpool_layers:
			self.params += layer.params
		for h_layer in self.hidden_layers:
			self.params += h_layer.params
		self.params += [self.W, self.b]

		# set up theano functions and variables:
		thX = T.tensor4('X', dtype='float32')
		thY = T.ivector('Y')
		pY = self.forward(thX) # the forward func will be defined

		cost = -T.mean(T.log(pY[T.arange(pY.shape[0]), thY]))
		# add the regularization term to the cost:
		reg_term = reg*T.sum([(p*p).sum() for p in self.params])
		cost += reg_term

		prediction = self.th_predict(thX)

		# theano function to make the actual calculation of cost
		# and get the prediction:
		cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction])

		updates = rmsprop(cost, self.params, lr, mu, decay, eps)
		train_op = theano.function(
			inputs=[thX, thY], 
			updates=updates,
			outputs=cost, 
		)

		# the training loop:
		n_batches = N // batch_size
		train_costs = []
		valid_costs = []
		t0 = datetime.now()
		for i in range(epochs):
			X, Y = shuffle(X, Y)
			for j in range(n_batches):
				Xbatch = X[j*batch_size:(j+1)*batch_size, :]
				Ybatch = Y[j*batch_size:(j+1)*batch_size]

				train_cost = train_op(Xbatch, Ybatch)
				train_costs.append(train_cost)
				if j % 20 == 0:
					cost_val, prediction_val = cost_predict_op(Xvalid, Yvalid)
					error = error_rate(prediction_val, Yvalid)
					print('\ni: %d,  j: %d,  valid_cost: %.3f,  error: %.3f' % (i, j, cost_val, error))
					valid_costs.append(cost_val)

		print('\nElapsed time: ', datetime.now() - t0)
			
		if display_cost:
			plt.plot(train_costs)
			plt.title('Cost on Training Set')
			plt.xlabel('iterations')
			plt.show()

			plt.plot(valid_costs)
			plt.title('Cost on Validation Set')
			plt.xlabel('iterations')
			plt.show()
Ejemplo n.º 30
0
    def fit(self,
            X,
            Y,
            Xvalid,
            Yvalid,
            learning_rate=1e-2,
            mu=0.99,
            decay=0.999,
            reg=1e-3,
            epochs=10,
            batch_sz=100,
            show_fig=False):
        K = len(set(Y))

        #make a validation set
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = y2indicator(Y).astype(np.float32)

        #for cauculating error rate
        Yvalid_flat = Yvalid
        Yvalid = y2indicator(Yvalid).astype(np.float32)

        #initialize hidden layers
        N, D = X.shape

        self.hidden_layers = []
        M1 = D
        count = 0
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1

        W, b = init_weight_and_bias(M1, K)
        self.W = tf.Variable(W.astype(np.float32))
        self.b = tf.Variable(b.astype(np.float32))

        #collect param for later use
        self.params = [self.W, self.b]
        for h in self.hidden_layers:
            self.params += h.params

        #set up function and variable
        tfX = tf.placeholder(tf.float32, shape=(None, D), name='X')
        tfT = tf.placeholder(tf.float32, shape=(None, K), name='T')
        act = self.forward(tfX)

        rcost = reg * sum([tf.nn.l2_loss(p) for p in self.params])
        cost = tf.reduce_sum(
            tf.nn.softmax_cross_entropy_with_logits(logits=act,
                                                    labels=tfT)) + rcost
        prediction = self.predict(tfX)
        train_op = tf.train.RMSPropOptimizer(learning_rate,
                                             decay=decay,
                                             momentum=mu).minimize(cost)

        n_batches = N // batch_sz
        costs = []
        init = tf.global_variables_initializer()
        with tf.Session() as session:
            session.run(init)
            for i in range(epochs):
                X, Y = shuffle(X, Y)
                for j in range(n_batches):
                    Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)]
                    Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)]

                    session.run(train_op, feed_dict={tfX: Xbatch, tfT: Ybatch})

                    if j % 20 == 0:
                        c = session.run(cost,
                                        feed_dict={
                                            tfX: Xvalid,
                                            tfT: Yvalid
                                        })
                        costs.append(c)

                        p = session.run(prediction,
                                        feed_dict={
                                            tfX: Xvalid,
                                            tfT: Yvalid
                                        })
                        e = error_rate(Yvalid_flat, p)
                        print("i:", i, "j:", j, "nb:", n_batches, "cost:", c,
                              "error rate:", e)

        if show_fig:
            plt.plot(costs)
            plt.show()
    def fit(self,
            X,
            Y,
            Xvalid,
            Yvalid,
            learning_rate=1e-2,
            mu=0.99,
            decay=0.999,
            reg=1e-3,
            eps=1e-8,
            epochs=10,
            batch_sz=100,
            show_fig=False):
        # downcast
        learning_rate = np.float32(learning_rate)
        mu = np.float32(mu)
        decay = np.float32(decay)
        reg = np.float32(reg)
        eps = np.float32(eps)

        X = X.astype(np.float32)
        Xvalid = Xvalid.astype(np.float32)
        Y = Y.astype(np.int32)
        Yvalid = Yvalid.astype(np.int32)

        # initialize hidden layers
        N, D = X.shape
        K = len(set(Y))
        self.hidden_layers = []
        M1 = D
        count = 0
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1
        W, b = init_weight_and_bias(M1, K)
        self.W = theano.shared(W, 'W_logreg')
        self.b = theano.shared(b, 'b_logreg')

        # collect params for later use
        self.params = [self.W, self.b]
        for h in self.hidden_layers:
            self.params += h.params

        # set up theano functions and variables
        thX = T.fmatrix('X')
        thY = T.ivector('Y')
        pY = self.th_forward(thX)

        rcost = reg * T.sum([(p * p).sum() for p in self.params])
        cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost
        prediction = self.th_predict(thX)

        # actual prediction function
        self.predict_op = theano.function(inputs=[thX], outputs=prediction)
        cost_predict_op = theano.function(inputs=[thX, thY],
                                          outputs=[cost, prediction])

        updates = rmsprop(cost, self.params, learning_rate, mu, decay, eps)
        train_op = theano.function(inputs=[thX, thY], updates=updates)

        n_batches = N // batch_sz
        costs = []
        for i in range(epochs):
            X, Y = shuffle(X, Y)
            for j in range(n_batches):
                Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)]
                Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)]

                train_op(Xbatch, Ybatch)

                if j % 20 == 0:
                    c, p = cost_predict_op(Xvalid, Yvalid)
                    costs.append(c)
                    e = error_rate(Yvalid, p)
                    print("i:", i, "j:", j, "nb:", n_batches, "cost:", c,
                          "error rate:", e)

        if show_fig:
            plt.plot(costs)
            plt.show()
    def fit(self, X, Y, lr=10e-5, mu=0.99, reg=10e-7, decay=0.99999, eps=10e-3, batch_sz=30, epochs=100, show_fig=True):
        lr = np.float32(lr)
        mu = np.float32(mu)
        reg = np.float32(reg)
        decay = np.float32(decay)
        eps = np.float32(eps)

        # make a validation set
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = Y.astype(np.int32)
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        X, Y = X[:-1000], Y[:-1000]

        # initialize convpool layers
        N, c, d, d = X.shape
        mi = c
        outw = d
        outh = d
        self.convpool_layers = []
        for mo, fw, fh in self.convpool_layer_sizes:
            layer = ConvPoolLayer(mi, mo, fw, fh)
            self.convpool_layers.append(layer)
            outw = (outw - fw + 1) / 2
            outh = (outh - fh + 1) / 2
            mi = mo

        # initialize mlp layers
        K = len(set(Y))
        self.hidden_layers = []
        M1 = self.convpool_layer_sizes[-1][0]*outw*outh # size must be same as output of last convpool layer
        count = 0
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1

        # logistic regression layer
        W, b = init_weight_and_bias(M1, K)
        self.W = theano.shared(W, 'W_logreg')
        self.b = theano.shared(b, 'b_logreg')

        # collect params for later use
        self.params = [self.W, self.b]
        for c in self.convpool_layers:
            self.params += c.params
        for h in self.hidden_layers:
            self.params += h.params

        # for momentum
        dparams = [theano.shared(np.zeros(p.get_value().shape, dtype=np.float32)) for p in self.params]

        # for rmsprop
        cache = [theano.shared(np.zeros(p.get_value().shape, dtype=np.float32)) for p in self.params]

        # set up theano functions and variables
        thX = T.tensor4('X', dtype='float32')
        thY = T.ivector('Y')
        pY = self.forward(thX)

        rcost = reg*T.sum([(p*p).sum() for p in self.params])
        cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost
        prediction = self.predict(thX)

        cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction])

        # updates = [
        #     (c, decay*c + (np.float32(1)-decay)*T.grad(cost, p)*T.grad(cost, p)) for p, c in zip(self.params, cache)
        # ] + [
        #     (p, p + mu*dp - lr*T.grad(cost, p)/T.sqrt(c + eps)) for p, c, dp in zip(self.params, cache, dparams)
        # ] + [
        #     (dp, mu*dp - lr*T.grad(cost, p)/T.sqrt(c + eps)) for p, c, dp in zip(self.params, cache, dparams)
        # ]

        # momentum only
        updates = [
            (p, p + mu*dp - lr*T.grad(cost, p)) for p, dp in zip(self.params, dparams)
        ] + [
            (dp, mu*dp - lr*T.grad(cost, p)) for p, dp in zip(self.params, dparams)
        ]

        train_op = theano.function(
            inputs=[thX, thY],
            updates=updates
        )

        n_batches = N / batch_sz
        costs = []
        for i in xrange(epochs):
            X, Y = shuffle(X, Y)
            for j in xrange(n_batches):
                Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]
                Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]

                train_op(Xbatch, Ybatch)

                if j % 20 == 0:
                    c, p = cost_predict_op(Xvalid, Yvalid)
                    costs.append(c)
                    e = error_rate(Yvalid, p)
                    print "i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e

        if show_fig:
            plt.plot(costs)
            plt.show()
Ejemplo n.º 33
0
    def fit(self,
            X,
            Y,
            lr=1e-3,
            mu=0.99,
            reg=10e-4,
            decay=0.99999,
            eps=10e-3,
            batch_sz=30,
            epochs=3,
            show_fig=True):
        lr = np.float32(lr)
        mu = np.float32(mu)
        reg = np.float32(reg)
        decay = np.float32(decay)
        eps = np.float32(eps)
        K = len(set(Y))  #Unique values in Y

        #Creating Validation set
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = y2indicator(Y).astype(np.float32)

        Xvalid, Yvalid = X[-1000:], Y[-1000:]  #First 1000
        X, Y = X[:-1000], Y[:-1000]  #Set X, Y to remaining
        Yvalid_flat = np.argmax(Yvalid, axis=1)  #For error claculation

        #Initialize ConvPool layer
        N, d, d, c = X.shape
        mi = c  #Input feature map = color
        outw = d
        outh = d
        self.convpool_layers = []  #Save convool layers in a list
        for mo, fw, fh in self.convpool_layer_sizes:
            layers = ConvPoolLayer(mi, mo, fw, fh)  #Initialize layers
            self.convpool_layers.append(layers)
            outw = outw / 2  #Divide by 2 because of pooling layer
            outh = outh / 2
            mi = mo

        #Initialize Hidden layers
        self.hidden_layers = []
        M1 = self.convpool_layer_sizes[-1][0] * outw * outh
        count = 0  #As these Id's will be passed into hidden layers
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count = count + 1

        #Initialize Logistic Regression
        W, b = init_weight_and_bias(M1, K)
        self.W = tf.Variable(W, 'W_logreg')
        self.b = tf.Variable(b, 'b_logreg')

        self.params = [self.W, self.b]
        for h in self.convpool_layers:
            self.params = self.params + h.params

        for h in self.hidden_layers:
            self.params = self.params + h.params

        #Define TensorFlow functions and Variables
        tfX = tf.placeholder(tf.float32, shape=(None, d, d, c))
        tfY = tf.placeholder(tf.float32, shape=(None, K))
        act = self.forward(tfX)

        #Calculate Regularization Cost

        rcost = reg * sum([tf.nn.l2_loss(p) for p in self.params])

        #Calculate Final Cost
        #Activation, Indicator Martix of targets
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=act,
                                                    labels=tfY)) + rcost

        #Calculate prediction

        prediction = self.predict(tfX)

        #Define train function

        # train_op = tf.train.RMSPropOptimizer(lr, decay=decay, momentum=mu).minimize(cost)
        train_op = tf.train.AdamOptimizer(lr).minimize(cost)
        #Calculate No of batches
        n_batches = N / batch_sz

        #Initialize cost array
        costs = []

        #Initialize all variables

        init = tf.global_variables_initializer()

        with tf.Session() as session:
            session.run(init)

            for i in range(epochs):
                X, Y = shuffle(X, Y)
                for j in range(n_batches):
                    Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)]
                    Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)]

                    session.run(train_op, feed_dict={tfX: Xbatch, tfY: Ybatch})

                    if j % 20 == 0:
                        c = session.run(cost,
                                        feed_dict={
                                            tfX: Xvalid,
                                            tfY: Yvalid
                                        })
                        costs.append(c)

                        #Calculate prediction
                        p = session.run(prediction,
                                        feed_dict={
                                            tfX: Xvalid,
                                            tfY: Yvalid
                                        })

                        #Calculate error rate
                        e = error_rate(Yvalid_flat, p)
                        print('i', i, 'j', j, 'n_batches', n_batches, 'cost',
                              c, 'error_rate', e)

        if show_fig:
            plt.plot(costs)
            plt.show()
Ejemplo n.º 34
0
 def fit(self, X_train, labels_train, X_val, labels_val, learning_rate=1e-4, mu=0.9,
         decay=0.99, lambda_=1e-3, epochs=5, batch_sz=200, show_fig=False):
     K = len(set(labels_train))
     
     # Correct datatype
     X_train, X_val = X_train.astype(np.float32), X_val.astype(np.float32)
     Y_train, Y_val = y2indicator(labels_train).astype(np.float32), y2indicator(labels_val).astype(np.float32)
     
     # Initialize convpool layers
     N, width, height, c = X_train.shape
     mi = c
     outw = width
     outh = height
     self.convpool_layers = []
     for mo, fw, fh in self.convpool_layer_sizes:
         cp = ConvPoolLayer(mi, mo, fw, fh)
         self.convpool_layers.append(cp)
         outw = outw // 2
         outh = outh // 2
         mi = mo
         
     # Initialize hidden layers
     self.hidden_layers = []
     M1 = mi * outw * outh
     count = 0
     for M2 in self.hidden_layer_sizes:
         h = HiddenLayer(M1, M2, count)
         self.hidden_layers.append(h)
         M1 = M2
         count += 1
     
     # Initialize Output Layer
     W, b = init_weight_and_bias(M1, K)
     self.W = tf.Variable(W)
     self.b = tf.Variable(b)
     
     # Collect params for later use
     self.params = [self.W, self.b]
     for cp in self.convpool_layers:
         self.params += cp.params
     for h in self.hidden_layers:
         self.params += h.params
     
     # Set up tensorflow functions and variables
     tf_X = tf.placeholder(tf.float32, shape=(None, width, height, c), name='X')
     tf_Y = tf.placeholder(tf.float32, shape=(None, K), name='Y')
     logits = self.forward(tf_X)
     
     reg_cost = lambda_ * sum([tf.nn.l2_loss(p) for p in self.params])
     cost = tf.reduce_mean(
         tf.nn.softmax_cross_entropy_with_logits(
             logits=logits, 
             labels=tf_Y
         )
     ) + reg_cost
     train_op = tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=mu).minimize(cost)
     
     predict_op = self.predict(tf_X)
     
     n_batches = N // batch_sz
     costs = []
     best_val_error = 1
     init = tf.global_variables_initializer()
     with tf.Session() as session:
         session.run(init)
         for i in range(epochs):
             X_train, Y_train = shuffle(X_train, Y_train)
             for j in range(n_batches):
                 Xbatch = X_train[j*batch_sz:(j*batch_sz+batch_sz)]
                 Ybatch = Y_train[j*batch_sz:(j*batch_sz+batch_sz)]
                 
                 session.run(train_op, feed_dict={tf_X: Xbatch, tf_Y: Ybatch})
                 
                 if j % 20 == 0:
                     c = session.run(cost, feed_dict={tf_X: X_val, tf_Y: Y_val})
                     costs.append(c)
                     
                     labels_val_pred = session.run(predict_op, feed_dict={tf_X: X_val})
                     e = error_rate(labels_val, labels_val_pred)
                     print("i:", i, "j:", j, '/', n_batches, "cost:", c, "error_rate:", e)
                     if e < best_val_error:
                         best_val_error = e    
         print("best_val_error:", best_val_error)
         
     if show_fig:
         plt.plot(costs)
         plt.show()
    def fit(self, X, Y, learning_rate=1e-3, mu=0.9, decay=0.9, reg=0, eps=1e-10, epochs=100, batch_sz=30, show_fig=False):
        learning_rate = np.float32(learning_rate)
        mu = np.float32(mu)
        decay = np.float32(decay)
        reg = np.float32(reg)
        eps = np.float32(eps)

        # make a validation set
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = Y.astype(np.int32)
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        X, Y = X[:-1000], Y[:-1000]

        # initialize hidden layers
        N, D = X.shape
        K = len(set(Y))
        self.hidden_layers = []
        M1 = D
        count = 0
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1
        W, b = init_weight_and_bias(M1, K)
        self.W = theano.shared(W, 'W_logreg')
        self.b = theano.shared(b, 'b_logreg')

        # collect params for later use
        self.params = [self.W, self.b]
        for h in self.hidden_layers:
            self.params += h.params

        # set up theano functions and variables
        thX = T.fmatrix('X')
        thY = T.ivector('Y')
        pY = self.th_forward(thX)

        rcost = reg*T.sum([(p*p).sum() for p in self.params])
        cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost
        prediction = self.th_predict(thX)

        # actual prediction function
        self.predict_op = theano.function(inputs=[thX], outputs=prediction)
        cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction])

        updates = rmsprop(cost, self.params, learning_rate, mu, decay, eps)
        train_op = theano.function(
            inputs=[thX, thY],
            updates=updates
        )

        n_batches = N // batch_sz
        costs = []
        for i in range(epochs):
            X, Y = shuffle(X, Y)
            for j in range(n_batches):
                Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]
                Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]

                train_op(Xbatch, Ybatch)

                if j % 20 == 0:
                    c, p = cost_predict_op(Xvalid, Yvalid)
                    costs.append(c)
                    e = error_rate(Yvalid, p)
                    print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e)
        
        if show_fig:
            plt.plot(costs)
            plt.show()
Ejemplo n.º 36
0
    def fit(self, X, Y, lr=1e-6, mu=0.99, decay=0.999, reg=1e-11, eps=1e-9, epochs=300, batch_sz=100, show_fig=False):
        lr = np.float32(lr)
        mu = np.float32(mu)
        decay = np.float32(decay)
        reg = np.float32(reg)
        eps = np.float32(eps)

        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = Y.astype(np.int32)
        Xvalid = X[-1000:] # last 1000
        Yvalid = Y[-1000:] # last 1000
        X = X[:-1000] # all but the last 1000
        Y = Y[:-1000] # all but the last 1000

        N, D = X.shape
        K = len(set(Y))
        self.hidden_layers = []
        M1 = D
        count = 0
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1
        W, b = init_weight_and_bias(M1, K)
        self.W = theano.shared(W, 'W_logreg')
        self.b = theano.shared(b, 'b_logreg')

        self.params = [self.W, self.b]
        for h in self.hidden_layers:
            self.params += h.params

        dparams = [theano.shared(np.zeros(p.get_value().shape, dtype=np.float32)) for p in self.params]
        cache = [theano.shared(np.zeros(p.get_value().shape, dtype=np.float32)) for p in self.params]

        thX = T.fmatrix('X')
        thY = T.ivector('Y')
        pY = self.forward(thX)

        rcost = reg*T.sum([(p*p).sum() for p in self.params])
        cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost
        prediction = self.predict(thX)
        cost_predict_op = theano.function(inputs = [thX, thY], outputs = [cost, prediction])

        updates = [
            (c, decay*c + (np.float32(1)-decay)*T.grad(cost, p)*T.grad(cost, p)) for p, c in zip(self.params, cache)
        ] + [
            (p, p + mu*dp - lr*T.grad(cost, p)/T.sqrt(c+eps)) for p, c, dp in zip(self.params, cache, dparams)
        ] + [
            (dp, mu*dp - lr*T.grad(cost, p)/T.sqrt(c+eps)) for p, c, dp in zip(self.params, cache, dparams)
        ]

        train_op = theano.function(
            inputs = [thX, thY],
            updates=updates
        )

        n_batches = int(N / batch_sz)
        costs = []
        for i in range(epochs):
            X, Y = shuffle(X, Y)
            for j in range(n_batches):
                Xbatch = X[j*batch_sz:(j+1)*batch_sz]
                Ybatch = Y[j*batch_sz:(j+1)*batch_sz]

                train_op(Xbatch, Ybatch)

                if j % 20 == 0:
                    c, p = cost_predict_op(Xvalid, Yvalid)
                    costs.append(c)
                    e = error_rate(Yvalid, p)
                    print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e)

        if show_fig:
            plt.plot(costs)
            plt.show()
Ejemplo n.º 37
0
    def fit(self, X, Y, Xvalid, Yvalid, lr=1e-2, mu=0.9, reg=1e-3, decay=0.99999, eps=1e-10, batch_sz=30, epochs=5, show_fig=True):
        lr = np.float32(lr)
        mu = np.float32(mu)
        reg = np.float32(reg)
        decay = np.float32(decay)
        eps = np.float32(eps)
        K = len(set(Y))

        # make a validation set
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = y2indicator(Y).astype(np.float32)

        Yvalid = y2indicator(Yvalid).astype(np.float32)
        Yvalid_flat = np.argmax(Yvalid, axis=1) # for calculating error rate

        # initialize convpool layers
        N, width, height, c = X.shape
        mi = c
        outw = width
        outh = height
        self.convpool_layers = []
        for mo, fw, fh in self.convpool_layer_sizes:
            layer = ConvPoolLayer(mi, mo, fw, fh)
            self.convpool_layers.append(layer)
            outw = outw // 2
            outh = outh // 2
            mi = mo

        # initialize mlp layers
        self.hidden_layers = []
        M1 = self.convpool_layer_sizes[-1][0]*outw*outh # size must be same as output of last convpool layer
        count = 0
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1

        # logistic regression layer
        W, b = init_weight_and_bias(M1, K)
        self.W = tf.Variable(W, 'W_logreg')
        self.b = tf.Variable(b, 'b_logreg')

        # collect params for later use
        self.params = [self.W, self.b]
        for h in self.convpool_layers:
            self.params += h.params
        for h in self.hidden_layers:
            self.params += h.params

        # set up tensorflow functions and variables
        tfX = tf.placeholder(tf.float32, shape=(None, width, height, c), name='X')
        tfY = tf.placeholder(tf.float32, shape=(None, K), name='Y')
        act = self.forward(tfX)

        rcost = reg*sum([tf.nn.l2_loss(p) for p in self.params])
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                logits=act,
                labels=tfY
            )
        ) + rcost
        prediction = self.predict(tfX)

        train_op = tf.train.RMSPropOptimizer(lr, decay=decay, momentum=mu).minimize(cost)

        n_batches = N // batch_sz
        costs = []
        init = tf.global_variables_initializer()
        with tf.Session() as session:
            session.run(init)
            for i in range(epochs):
                X, Y = shuffle(X, Y)
                for j in range(n_batches):
                    Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]
                    Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]

                    session.run(train_op, feed_dict={tfX: Xbatch, tfY: Ybatch})

                    if j % 20 == 0:
                        c = session.run(cost, feed_dict={tfX: Xvalid, tfY: Yvalid})
                        costs.append(c)

                        p = session.run(prediction, feed_dict={tfX: Xvalid, tfY: Yvalid})
                        e = error_rate(Yvalid_flat, p)
                        print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e)

        if show_fig:
            plt.plot(costs)
            plt.show()
Ejemplo n.º 38
0
    def fit(self,
            X,
            Y,
            learning_rate=1e-4,
            mu=0.9,
            decay=0.9,
            epochs=15,
            batch_sz=100,
            display_cost=False,
            save_params=False):
        # set evarything to np.float32 to enable tf computation running correctly
        learning_rate = np.float32(learning_rate)
        mu = np.float32(mu)
        decay = np.float32(decay)

        # create a vailidation set:
        X, Y = shuffle(X, Y)

        Xvalid, Yvalid = X[-1000:, ], Y[-1000:]
        X, Y = X[:-1000, ], Y[:-1000]

        # initialize hidden layers:
        N, D = X.shape
        K = len(set(Y))
        self.hidden_layers = []
        M1 = D
        count = 0
        # iterate the self.hidden_layer_sizes list through M1 variable:
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1

        # the last_hidden_layer-output_layer weights and bias:
        W, b = init_weight_and_bias(M1, K)
        self.W = tf.Variable(W, name='W%s' % count)
        self.b = tf.Variable(b, name='b%s' % count)

        # collect all the network's parameters:
        self.params = [self.W, self.b]
        for h in self.hidden_layers:
            self.params += h.parameters

        # define tensorflow placeholders:
        tfX = tf.placeholder(tf.float32, shape=(None, D), name='X')
        tfT = tf.placeholder(tf.int32, shape=(None, ), name='T')

        # the logits ouputs of the network:
        Y_logits = self.forward_train(tfX)

        # define the expression for cost:
        cost = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=Y_logits,
                                                           labels=tfT))

        # define the tensorflow train function:
        train_op = tf.train.RMSPropOptimizer(learning_rate,
                                             decay=decay,
                                             momentum=mu).minimize(cost)

        predict_op = self.predict(tfX)

        # validation cost will be calculated separately since nothing will be dropped
        Y_logits_valid = self.forward_predict(tfX)
        cost_valid = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=Y_logits_valid, labels=tfT))

        n_batches = N // batch_sz
        costs = []
        init = tf.global_variables_initializer()
        with tf.Session() as session:
            # initialize all tf variables:
            print('\nInitializing variables...')
            session.run(init)
            print('\nPerforming batch SGD with RMSProp and momentum...')
            for i in range(epochs):
                X, Y = shuffle(X, Y)
                for j in range(n_batches):
                    Xbatch = X[j * batch_sz:(j + 1) * batch_sz, :]
                    Ybatch = Y[j * batch_sz:(j + 1) * batch_sz]

                    session.run(train_op, feed_dict={tfX: Xbatch, tfT: Ybatch})
                    if j % 20 == 0:
                        c = session.run(cost_valid,
                                        feed_dict={
                                            tfX: Xvalid,
                                            tfT: Yvalid
                                        })
                        costs.append(c)
                        prediction = session.run(predict_op,
                                                 feed_dict={
                                                     tfX: Xvalid,
                                                     tfT: Yvalid
                                                 })
                        #print(prediction)
                        error = error_rate(Yvalid, prediction)
                        print('\ni: %d,  j: %d,  cost: %.6f,  error: %.6f' %
                              (i, j, c, error))

            # make the final prediction:
            prediction = session.run(predict_op, feed_dict={tfX: Xvalid})
            final_error = error_rate(Yvalid, prediction)

            if save_params:
                for h in self.hidden_layers:
                    p_type = 'W'
                    for p in h.parameters:
                        p = p.eval()
                        #print(type(p))
                        #print(p.shape)
                        name = p_type + str(h.id)
                        np.save(name, p)
                        p_type = 'b'
                # last hidden layer - output layer parameters:
                np.save('W%s' % count, self.W.eval())
                np.save('b%s' % count, self.b.eval())

        if display_cost:
            plt.plot(costs)
            plt.show()