Ejemplo n.º 1
0
    def fit(self,
            X,
            Y,
            lr=1e-3,
            mu=0.99,
            reg=1e-3,
            decay=0.99999,
            eps=1e-10,
            batch_sz=30,
            epochs=3,
            show_fig=True):
        lr = np.float32(lr)
        mu = np.float32(mu)
        reg = np.float32(reg)
        decay = np.float32(decay)
        eps = np.float32(eps)
        K = len(set(Y))

        # make a validation set
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = y2indicator(Y).astype(np.float32)

        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        X, Y = X[:-1000], Y[:-1000]
        Yvalid_flat = np.argmax(Yvalid, axis=1)  # for calculating error rate

        # initialize convpool layers
        N, width, height, c = X.shape
        mi = c
        outw = width
        outh = height
        self.convpool_layers = []
        for mo, fw, fh in self.convpool_layer_sizes:
            layer = ConvPoolLayer(mi, mo, fw, fh)
            self.convpool_layers.append(layer)
            outw = outw // 2
            outh = outh // 2
            mi = mo

        # initialize mlp layers
        self.hidden_layers = []
        M1 = self.convpool_layer_sizes[-1][
            0] * outw * outh  # size must be same as output of last convpool layer
        count = 0
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1

        # logistic regression layer
        W, b = init_weight_and_bias(M1, K)
        self.W = tf.Variable(W, 'W_logreg')
        self.b = tf.Variable(b, 'b_logreg')

        # collect params for later use
        self.params = [self.W, self.b]
        for h in self.convpool_layers:
            self.params += h.params
        for h in self.hidden_layers:
            self.params += h.params

        # set up tensorflow functions and variables
        tfX = tf.placeholder(tf.float32,
                             shape=(None, width, height, c),
                             name='X')
        tfY = tf.placeholder(tf.float32, shape=(None, K), name='Y')
        act = self.forward(tfX)

        rcost = reg * sum([tf.nn.l2_loss(p) for p in self.params])
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=act,
                                                    labels=tfY)) + rcost
        prediction = self.predict(tfX)

        train_op = tf.train.RMSPropOptimizer(lr, decay=decay,
                                             momentum=mu).minimize(cost)

        n_batches = N // batch_sz
        costs = []
        init = tf.global_variables_initializer()
        with tf.Session() as session:
            session.run(init)
            for i in range(epochs):
                X, Y = shuffle(X, Y)
                for j in range(n_batches):
                    Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)]
                    Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)]

                    session.run(train_op, feed_dict={tfX: Xbatch, tfY: Ybatch})

                    if j % 20 == 0:
                        c = session.run(cost,
                                        feed_dict={
                                            tfX: Xvalid,
                                            tfY: Yvalid
                                        })
                        costs.append(c)

                        p = session.run(prediction,
                                        feed_dict={
                                            tfX: Xvalid,
                                            tfY: Yvalid
                                        })
                        e = error_rate(Yvalid_flat, p)
                        print("i:", i, "j:", j, "nb:", n_batches, "cost:", c,
                              "error rate:", e)

        if show_fig:
            plt.plot(costs)
            plt.show()
Ejemplo n.º 2
0
    def fit(self,
            X,
            Y,
            lr=1e-3,
            mu=0.99,
            reg=10e-4,
            decay=0.99999,
            eps=10e-3,
            batch_sz=30,
            epochs=3,
            show_fig=True):
        lr = np.float32(lr)
        mu = np.float32(mu)
        reg = np.float32(reg)
        decay = np.float32(decay)
        eps = np.float32(eps)
        K = len(set(Y))  #Unique values in Y

        #Creating Validation set
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = y2indicator(Y).astype(np.float32)

        Xvalid, Yvalid = X[-1000:], Y[-1000:]  #First 1000
        X, Y = X[:-1000], Y[:-1000]  #Set X, Y to remaining
        Yvalid_flat = np.argmax(Yvalid, axis=1)  #For error claculation

        #Initialize ConvPool layer
        N, d, d, c = X.shape
        mi = c  #Input feature map = color
        outw = d
        outh = d
        self.convpool_layers = []  #Save convool layers in a list
        for mo, fw, fh in self.convpool_layer_sizes:
            layers = ConvPoolLayer(mi, mo, fw, fh)  #Initialize layers
            self.convpool_layers.append(layers)
            outw = outw / 2  #Divide by 2 because of pooling layer
            outh = outh / 2
            mi = mo

        #Initialize Hidden layers
        self.hidden_layers = []
        M1 = self.convpool_layer_sizes[-1][0] * outw * outh
        count = 0  #As these Id's will be passed into hidden layers
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count = count + 1

        #Initialize Logistic Regression
        W, b = init_weight_and_bias(M1, K)
        self.W = tf.Variable(W, 'W_logreg')
        self.b = tf.Variable(b, 'b_logreg')

        self.params = [self.W, self.b]
        for h in self.convpool_layers:
            self.params = self.params + h.params

        for h in self.hidden_layers:
            self.params = self.params + h.params

        #Define TensorFlow functions and Variables
        tfX = tf.placeholder(tf.float32, shape=(None, d, d, c))
        tfY = tf.placeholder(tf.float32, shape=(None, K))
        act = self.forward(tfX)

        #Calculate Regularization Cost

        rcost = reg * sum([tf.nn.l2_loss(p) for p in self.params])

        #Calculate Final Cost
        #Activation, Indicator Martix of targets
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=act,
                                                    labels=tfY)) + rcost

        #Calculate prediction

        prediction = self.predict(tfX)

        #Define train function

        # train_op = tf.train.RMSPropOptimizer(lr, decay=decay, momentum=mu).minimize(cost)
        train_op = tf.train.AdamOptimizer(lr).minimize(cost)
        #Calculate No of batches
        n_batches = N / batch_sz

        #Initialize cost array
        costs = []

        #Initialize all variables

        init = tf.global_variables_initializer()

        with tf.Session() as session:
            session.run(init)

            for i in range(epochs):
                X, Y = shuffle(X, Y)
                for j in range(n_batches):
                    Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)]
                    Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)]

                    session.run(train_op, feed_dict={tfX: Xbatch, tfY: Ybatch})

                    if j % 20 == 0:
                        c = session.run(cost,
                                        feed_dict={
                                            tfX: Xvalid,
                                            tfY: Yvalid
                                        })
                        costs.append(c)

                        #Calculate prediction
                        p = session.run(prediction,
                                        feed_dict={
                                            tfX: Xvalid,
                                            tfY: Yvalid
                                        })

                        #Calculate error rate
                        e = error_rate(Yvalid_flat, p)
                        print('i', i, 'j', j, 'n_batches', n_batches, 'cost',
                              c, 'error_rate', e)

        if show_fig:
            plt.plot(costs)
            plt.show()