def fit(self,
            X,
            Y,
            lr=10e-5,
            mu=0.99,
            reg=10e-7,
            decay=0.99999,
            eps=10e-3,
            batch_sz=30,
            epochs=100,
            show_fig=True):
        lr = np.float32(lr)
        mu = np.float32(mu)
        reg = np.float32(reg)
        decay = np.float32(decay)
        eps = np.float32(eps)

        # make a validation set
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = Y.astype(np.int32)
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        X, Y = X[:-1000], Y[:-1000]

        # initialize convpool layers
        N, c, width, height = X.shape
        mi = c
        outw = width
        outh = height
        self.convpool_layers = []
        for mo, fw, fh in self.convpool_layer_sizes:
            layer = ConvPoolLayer(mi, mo, fw, fh)
            self.convpool_layers.append(layer)
            outw = (outw - fw + 1) // 2
            outh = (outh - fh + 1) // 2
            mi = mo

        # initialize mlp layers
        K = len(set(Y))
        self.hidden_layers = []
        M1 = self.convpool_layer_sizes[-1][
            0] * outw * outh  # size must be same as output of last convpool layer
        count = 0
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1

        # logistic regression layer
        W, b = init_weight_and_bias(M1, K)
        self.W = theano.shared(W, 'W_logreg')
        self.b = theano.shared(b, 'b_logreg')

        # collect params for later use
        self.params = [self.W, self.b]
        for c in self.convpool_layers:
            self.params += c.params
        for h in self.hidden_layers:
            self.params += h.params

        # for momentum
        dparams = [
            theano.shared(np.zeros(p.get_value().shape, dtype=np.float32))
            for p in self.params
        ]

        # for rmsprop
        cache = [
            theano.shared(np.zeros(p.get_value().shape, dtype=np.float32))
            for p in self.params
        ]

        # set up theano functions and variables
        thX = T.tensor4('X', dtype='float32')
        thY = T.ivector('Y')
        pY = self.forward(thX)

        rcost = reg * T.sum([(p * p).sum() for p in self.params])
        cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost
        prediction = self.th_predict(thX)

        cost_predict_op = theano.function(inputs=[thX, thY],
                                          outputs=[cost, prediction])

        # updates = [
        #     (c, decay*c + (np.float32(1)-decay)*T.grad(cost, p)*T.grad(cost, p)) for p, c in zip(self.params, cache)
        # ] + [
        #     (p, p + mu*dp - lr*T.grad(cost, p)/T.sqrt(c + eps)) for p, c, dp in zip(self.params, cache, dparams)
        # ] + [
        #     (dp, mu*dp - lr*T.grad(cost, p)/T.sqrt(c + eps)) for p, c, dp in zip(self.params, cache, dparams)
        # ]

        # momentum only
        updates = [(p, p + mu * dp - lr * T.grad(cost, p))
                   for p, dp in zip(self.params, dparams)
                   ] + [(dp, mu * dp - lr * T.grad(cost, p))
                        for p, dp in zip(self.params, dparams)]

        train_op = theano.function(inputs=[thX, thY], updates=updates)

        n_batches = N // batch_sz
        costs = []
        for i in range(epochs):
            X, Y = shuffle(X, Y)
            for j in range(n_batches):
                Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)]
                Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)]

                train_op(Xbatch, Ybatch)

                if j % 20 == 0:
                    c, p = cost_predict_op(Xvalid, Yvalid)
                    costs.append(c)
                    e = error_rate(Yvalid, p)
                    print("i:", i, "j:", j, "nb:", n_batches, "cost:", c,
                          "error rate:", e)

        if show_fig:
            plt.plot(costs)
            plt.show()
Beispiel #2
0
    def fit(self,
            X,
            Y,
            lr=10e-5,
            mu=0.99,
            reg=10e-7,
            decay=0.99999,
            eps=10e-3,
            batch_sz=30,
            epochs=100,
            show_fig=True):
        lr = np.float32(lr)
        mu = np.float32(mu)
        reg = np.float32(reg)
        decay = np.float32(decay)
        eps = np.float32(eps)

        # ============= Prep Data =============
        # Validation set
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = Y.astype(np.int32)
        # Valid set - last 1000 entries
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        # Training set - Everything except last 1000 entries
        X, Y = X[:-1000], Y[:-1000]

        # ============= Prep ConvPool layers =============
        # initialize convpool layers
        N, c, width, height = X.shape
        mi = c
        outw = width
        outh = height
        self.convpool_layers = []
        # For each parameterised convpool layer
        conv_layer_count = 0
        for mo, fw, fh in self.convpool_layer_sizes:
            layer = ConvPoolLayer(mi, mo, fw, fh,
                                  self.pool_sz[conv_layer_count])
            # Add layer
            self.convpool_layers.append(layer)
            # Output W after convolution layer
            outw = (outw - fw + 1) // self.pool_sz[conv_layer_count][0]
            outh = (outh - fh + 1) // self.pool_sz[conv_layer_count][1]
            # Set feature input to previous feature output
            # for the next loop
            mi = mo
            conv_layer_count += 1
        # ============= Prep ANN layers =============
        # K = length of all the unique values of Y
        K = len(set(Y))
        # list to store all the hidden layers
        self.hidden_layers = []
        # Output of last convpool layer feature output
        # This is to flatten the last convpool feature output as an input to the ANN
        M1 = self.convpool_layer_sizes[-1][
            0] * outw * outh  # size must be same as output of last convpool layer
        count = 0
        # Loop through the hidden layers in hidden_layer_sizes
        for M2 in self.hidden_layer_sizes:
            # Create hidden layer
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            # Set feature input to previous feature output
            # for the next loop
            M1 = M2
            count += 1
        # ============= Prep Log Regression layer =============
        W, b = init_weight_and_bias(M1, K)
        self.W = theano.shared(W, 'W_logreg')
        self.b = theano.shared(b, 'b_logreg')
        # ============= Collect parameters for SGD  =============
        self.params = [self.W, self.b]
        for c in self.convpool_layers:
            self.params += c.params
        for h in self.hidden_layers:
            self.params += h.params

        # momentum
        dparams = [
            theano.shared(np.zeros(p.get_value().shape, dtype=np.float32))
            for p in self.params
        ]
        # rmsprop
        cache = [
            theano.shared(np.zeros(p.get_value().shape, dtype=np.float32))
            for p in self.params
        ]
        # define theano variables - X and Y
        thX = T.tensor4('X', dtype='float32')
        thY = T.ivector('Y')
        # Probability of Y
        pY = self.forward(thX)
        # regularisation cost
        # rcost = reg_parameter*sum(each_parameter^2)
        rcost = reg * T.sum([(p * p).sum() for p in self.params])
        # cost = mean*log(all the relevant targets)
        cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost
        # prediction
        prediction = self.th_predict(thX)

        # function to calculate the prediction cost without updates
        # used to calculate cost of prediction for the validation set
        cost_predict_op = theano.function(inputs=[thX, thY],
                                          outputs=[cost, prediction])

        # momentum updates
        # momentum only. Update params and dparams
        updates = [(p, p + mu * dp - lr * T.grad(cost, p))
                   for p, dp in zip(self.params, dparams)
                   ] + [(dp, mu * dp - lr * T.grad(cost, p))
                        for p, dp in zip(self.params, dparams)]

        train_op = theano.function(inputs=[thX, thY], updates=updates)

        n_batches = N // batch_sz
        costs = []
        for i in range(epochs):
            X, Y = shuffle(X, Y)
            for j in range(n_batches):
                Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)]
                Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)]

                train_op(Xbatch, Ybatch)

                if j % 20 == 0:
                    c, p = cost_predict_op(Xvalid, Yvalid)
                    costs.append(c)
                    e = error_rate(Yvalid, p)
                    print("i:", i, "j:", j, "nb:", n_batches, "cost:", c,
                          "error rate:", e)

        if show_fig:
            plt.plot(costs)
            plt.savefig("cost.png")
    def fit(self, X, Y, Xvalid, Yvalid, lr=1e-3, mu=0.99, reg=1e-3, decay=0.99999, eps=1e-10, batch_sz=30, epochs=3, show_fig=True):
        # downcast
        lr = np.float32(lr)
        mu = np.float32(mu)
        reg = np.float32(reg)
        decay = np.float32(decay)
        eps = np.float32(eps)

        X = X.astype(np.float32)
        Xvalid = Xvalid.astype(np.float32)
        Y = Y.astype(np.int32)
        Yvalid = Yvalid.astype(np.int32)

        # initialize convpool layers
        N, c, width, height = X.shape
        mi = c
        outw = width
        outh = height
        self.convpool_layers = []
        for mo, fw, fh in self.convpool_layer_sizes:
            layer = ConvPoolLayer(mi, mo, fw, fh)
            self.convpool_layers.append(layer)
            outw = (outw - fw + 1) // 2
            outh = (outh - fh + 1) // 2
            mi = mo

        # initialize mlp layers
        K = len(set(Y))
        self.hidden_layers = []
        M1 = self.convpool_layer_sizes[-1][0]*outw*outh # size must be same as output of last convpool layer
        count = 0
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1

        # logistic regression layer
        W, b = init_weight_and_bias(M1, K)
        self.W = theano.shared(W, 'W_logreg')
        self.b = theano.shared(b, 'b_logreg')

        # collect params for later use
        self.params = [self.W, self.b]
        for c in self.convpool_layers:
            self.params += c.params
        for h in self.hidden_layers:
            self.params += h.params

        # set up theano functions and variables
        thX = T.tensor4('X', dtype='float32')
        thY = T.ivector('Y')
        pY = self.forward(thX)

        rcost = reg*T.sum([(p*p).sum() for p in self.params])
        cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost
        prediction = self.th_predict(thX)

        cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction])

        updates = rmsprop(cost, self.params, lr, mu, decay, eps)
        train_op = theano.function(
            inputs=[thX, thY],
            outputs=cost,
            updates=updates
        )

        n_batches = N // batch_sz
        costs = []
        for i in range(epochs):
            X, Y = shuffle(X, Y)
            for j in range(n_batches):
                Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]
                Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]

                train_c = train_op(Xbatch, Ybatch)

                if j % 20 == 0:
                    c, p = cost_predict_op(Xvalid, Yvalid)
                    costs.append(c)
                    e = error_rate(Yvalid, p)
                    print(
                        "i:", i,
                        "j:", j,
                        "nb:", n_batches,
                        "train cost:", train_c,
                        "cost:", c,
                        "error rate:", e
                    )

        if show_fig:
            plt.plot(costs)
            plt.show()
Beispiel #4
0
    def fit(self,
            X,
            Y,
            lr=10e-5,
            mu=0.99,
            reg=10e-7,
            decay=0.99999,
            eps=10e-3,
            batch_sz=30,
            epochs=100,
            show_fig=True):
        # step 1: process parameters to suitble type and preprocess input data
        lr = np.float32(lr)
        mu = np.float32(mu)
        reg = np.float32(reg)
        decay = np.float32(decay)
        eps = np.float32(eps)

        # make a validation set
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = Y.astype(np.int32)
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        Xtrain, Ytrain = X[:-1000], Y[:-1000]

        # step 2: initialize weights in convpool layers and mlp layers
        # convpool use padding='valid', convpool initialization
        N, c, width, height = Xtrain.shape
        mi = c
        outw = width
        outh = height
        self.convpool_layers = []
        for mo, fw, fh in self.conv_pool_size:
            h = ConvpoolLayer(mi, mo, fw, fh)
            self.convpool_layers.append(h)
            outw = (outw - fw + 1) // 2
            outh = (outh - fh + 1) // 2
            mi = mo

        # mlp initialization
        K = len(Ytrain)
        M1 = self.conv_pool_size[-1][0] * outw * outh
        count = 0
        self.hidden_layers = []
        for M2 in self.hidden_layer_size:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            count += 1
            M1 = M2

        # the output layer
        W, b = weights_and_bias_init(M1, K)
        self.W = theano.shared(W, 'W_logreg')
        self.b = theano.shared(b, 'b_logreg')

        # collect all parameters matrix as a list
        self.params = [self.W, self.b]
        for h in self.convpool_layers:
            self.params += h.params
        for h in self.hidden_layers:
            self.params += h.params

        # step 3: theano structure and cost, prediction, and updates expression
        # initialize: (momentum and RMSprop)
        dparams = [
            theano.shared(np.zeros(p.get_value().shape, dtype=np.float32))
            for p in self.params
        ]
        cache = [
            theano.shared(np.zeros(p.get_value().shape, dtype=np.float32))
            for p in self.params
        ]

        thX = T.tensor4('X', dtype='float')
        thT = T.ivector('T')
        Y = self.th_forward(thX)

        rcost = reg * T.sum((p * p).sum() for p in self.params)
        cost = -T.mean(T.log(Y[T.arrange(thT.shape[0]), thT])) + rcost
        prediction = self.th_predict(thX)

        self.predict_op = theano.function(inputs=[thX], outputs=prediction)
        cost_predict_op = theano.function(inputs=[thX, thT],
                                          outputs=[cost, prediction])

        updates = [(p, p + mu * dp - learning_rate * T.grad(cost, p))
                   for p, dp in zip(self.params, dparams)
                   ] + [(dp, mu * dp - learning_rate * T.grad(cost, p))
                        for p, dp in zip(self.params, dparams)]
        train_op = theano.function(inputs=[thX, thT], updates=updates)

        n_batches = N // batch_sz
        costs = []
        for i in range(epochs):
            X, Y = shuffle(X, Y)
            for j in range(n_batches):
                Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)]
                Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)]

                train_op(Xbatch, Ybatch)

                if j % 20 == 0:
                    c, p = cost_predict_op(Xvalid, Yvalid)
                    costs.append(c)
                    e = error_rate(Yvalid, p)
                    print("i:", i, "j:", j, "nb:", n_batches, "cost:", c,
                          "error rate:", e)

        if show_fig:
            plt.plot(costs)
            plt.show()