def main():
    # define data dims
    Nclass = 500
    D = 2
    M = 3
    K = 3

    # generate three gaussian clouds
    X1 = np.random.randn(Nclass, 2) + np.array([0, -2])
    X2 = np.random.randn(Nclass, 2) + np.array([
        2,
        2,
    ])
    X3 = np.random.randn(Nclass, 2) + np.array([
        -2,
        2,
    ])
    X = np.vstack([X1, X2, X3])

    Y = np.array([0] * Nclass + [1] * Nclass + [2] * Nclass)
    N = len(Y)
    # turn Y into an indicator matrix for training
    T = np.zeros((N, K))
    for i in range(N):
        T[i, Y[i]] = 1

    plt.scatter(X[:, 0], X[:, 1], c=Y, s=100, alpha=0.5)
    plt.show()

    W1, b1 = init_weight_and_biases(D, M)
    W2, b2 = init_weight_and_biases(M, K)

    # perform backpropgation
    learning_rate = 10e-7
    costs = []
    for epoch in range(100000):
        output, hidden = forward(X, W1, b1, W2, b2)

        if epoch % 100:
            c = cost(T, output)
            P = np.argmax(output, axis=1)
            r = classification_rate(Y, P)
            print("cost:", c, "classification rate:", r)
            costs.append(c)

        # gradient ascent (reverse of gradient descent)
        W2 += learning_rate * derivative_w2(hidden, T, output)
        b2 += learning_rate * derivative_b2(T, output)

        W1 += learning_rate * derivative_w1(X, hidden, T, output, W2)
        b1 += learning_rate * derivative_b1(T, output, W2, hidden)

    plt.plot(costs)
    plt.show()
예제 #2
0
	def __init__(self, M1, M2, an_id):
		self.id = an_id
		self.M1 = M1
		self.M2 = M2
		W, b = init_weight_and_biases(M1, M2)
		self.W = tf.Variable(W.astype(np.float32))
		self.b = tf.Variable(b.astype(np.float32))
		self.params = [self.W, self.b]
예제 #3
0
	def __init__(self, M1, M2, an_id):
		self.id = an_id
		self.M1 = M1
		self.M2 = M2
		W, b = init_weight_and_biases(M1, M2)
		self.W = theano.shared(W, 'W_%s' % self.id)
		self.b = theano.shared(b, 'b_%s' % self.id)
		self.params = [self.W, self.b]
예제 #4
0
X, Y = shuffle(X, Y)
Y = Y.astype(np.int32)

M = 5
D = X.shape[1]
K = len(set(Y))

# create train and test sets
Xtrain = X[:-100]
Ytrain = Y[:-100]
Ytrain_ind = y2indicator(Ytrain)
Xtest = X[-100:]
Ytest = Y[-100:]
Ytest_ind = y2indicator(Ytest)

W1, b1 = init_weight_and_biases(D, M)
W2, b2 = init_weight_and_biases(M, K)


def forward(X, W1, b1, W2, b2):
    Z = np.tanh(X.dot(W1) + b1)
    return softmax(Z.dot(W2) + b2), Z


def cross_entropy(T, pY):
    return -np.mean(T * np.log(pY))


train_costs = []
test_costs = []
예제 #5
0
def main():
	Xtrain, Ytrain, Xtest, Ytest = MNISTData().loadFlatData()

	Xtrain, Ytrain = shuffle(Xtrain, Ytrain)
	Xtest, Ytest = shuffle(Xtest, Ytest)

	Ytrain_ind = y2indicator(Ytrain)
	Ytest_ind = y2indicator(Ytest)


	max_iter = 20
	print_period = 10
	N, D = Xtrain.shape
	batch_sz = 500
	n_batches = N // batch_sz

	M1 = 1000
	M2 = 500
	K = 10
	W1_init, b1_init = init_weight_and_biases(D, M1)
	W2_init, b2_init = init_weight_and_biases(M1, M2)
	W3_init, b3_init = init_weight_and_biases(M2, K)

	# define tensorflow vars and expressions
	X = tf.placeholder(tf.float32, shape=[None, D], name='X')
	T = tf.placeholder(tf.float32, shape=[None, K], name='T')
	W1 = tf.Variable(W1_init.astype(np.float32))
	b1 = tf.Variable(b1_init.astype(np.float32))
	W2 = tf.Variable(W2_init.astype(np.float32))
	b2 = tf.Variable(b2_init.astype(np.float32))
	W3 = tf.Variable(W3_init.astype(np.float32))
	b3 = tf.Variable(b3_init.astype(np.float32))

	Z1 = tf.nn.relu(tf.matmul(X, W1) + b1)
	Z2 = tf.nn.relu(tf.matmul(Z1,  W2) + b2)

	Yish = tf.matmul(Z2, W3) + b3
	cost =tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=Yish, labels=T))

	train_op = tf.train.RMSPropOptimizer(0.0001, decay=0.99, momentum=0.9).minimize(cost)

	# used for error rate prediction
	predict_op = tf.argmax(Yish, 1)

	LL = []
	init = tf.global_variables_initializer()
	with tf.Session() as session:
		session.run(init)

		for i in range(max_iter):
			for j in range(n_batches):
				Xbatch = Xtrain[j * batch_sz:(j * batch_sz + batch_sz), ]
				Ybatch = Ytrain_ind[j * batch_sz:(j * batch_sz + batch_sz), ]

				session.run(train_op, feed_dict={X: Xbatch, T: Ybatch})
				if j % print_period == 0:
					test_cost = session.run(cost, feed_dict={X: Xtest, T: Ytest_ind})
					prediction = session.run(predict_op, feed_dict={X: Xtest, T: Ytest_ind})
					err = error_rate(prediction, Ytest)

					print("Cost / err at iteration i=%d, j=%d: %.3f / %.3f" % (i, j, test_cost, err))
					LL.append(test_cost)

	plt.plot(LL)
	plt.show()
X, Y = get_data()
X, Y = shuffle(X, Y)
Y = Y.astype(np.int32)
D = X.shape[1]
K = len(set(Y))

# split into train and test sets
Xtrain = X[:-100]
Ytrain = Y[:-100]
Ytrain_ind = y2indicator(Ytrain)
Xtest = X[-100:]
Ytest = Y[-100:]
Ytest_ind = y2indicator(Ytest)

# initialize weights
W, b = init_weight_and_biases(D, K)


def forward(X, W, b):
    return softmax(X.dot(W) + b)


def cross_entropy(T, pY):
    return -np.mean(T * np.log(pY))


train_costs = []
test_costs = []

learning_rate = 0.001
예제 #7
0
	def fit(self, X, Y, lr=10e-4, mu=0.99, reg=10e-4, decay=0.99999, eps=10e-3, batch_sz=30, epochs=3, show_fig=True):
		lr = np.float32(lr)
		mu = np.float32(mu)
		reg = np.float32(reg)
		decay = np.float32(decay)
		eps = np.float32(eps)
		K = len(set(Y))

		# make a validation set
		X, Y = shuffle(X, Y)
		X = X.astype(np.float32)
		Y = y2indicator(Y).astype(np.float32)

		Xvalid, Yvalid = X[-1000:], Y[-1000:]
		X, Y = X[:-1000], Y[:-1000]
		Yvalid_flat = np.argmax(Yvalid, axis=1)  # for calculating error rate

		# initialize convpool layers
		N, width, height, c = X.shape
		mi = c
		outw = width
		outh = height
		self.convpool_layers = []
		for mo, fw, fh in self.convpool_layer_sizes:
			layer = ConvPoolLayer(mi, mo, fw, fh)
			self.convpool_layers.append(layer)
			outw = outw / 2
			outh = outh / 2
			mi = mo

		# initialize mlp layers
		self.hidden_layers = []
		M1 = self.convpool_layer_sizes[-1][0] * outw * outh  # size must be same as output of last convpool layer
		count = 0
		for M2 in self.hidden_layer_sizes:
			h = HiddenLayer(M1, M2, count)
			self.hidden_layers.append(h)
			M1 = M2
			count += 1

		# logistic regression layer
		W, b = init_weight_and_biases(M1, K)
		self.W = tf.Variable(W, 'W_logreg')
		self.b = tf.Variable(b, 'b_logreg')

		# collect params for later use
		self.params = [self.W, self.b]
		for h in self.convpool_layers:
			self.params += h.params
		for h in self.hidden_layers:
			self.params += h.params

		# set up tensorflow functions and variables
		tfX = tf.placeholder(tf.float32, shape=(None, width, height, c), name='X')
		tfY = tf.placeholder(tf.float32, shape=(None, K), name='Y')
		act = self.forward(tfX)

		rcost = reg * sum([tf.nn.l2_loss(p) for p in self.params])
		cost = tf.reduce_mean(
			tf.nn.softmax_cross_entropy_with_logits(
				logits=act,
				labels=tfY
			)
		) + rcost
		prediction = self.predict(tfX)

		train_op = tf.train.RMSPropOptimizer(lr, decay=decay, momentum=mu).minimize(cost)

		n_batches = N // batch_sz
		costs = []
		init = tf.global_variables_initializer()
		with tf.Session() as session:
			session.run(init)
			for i in range(epochs):
				X, Y = shuffle(X, Y)
				for j in range(n_batches):
					Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)]
					Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)]

					session.run(train_op, feed_dict={tfX: Xbatch, tfY: Ybatch})

					if j % 20 == 0:
						c = session.run(cost, feed_dict={tfX: Xvalid, tfY: Yvalid})
						costs.append(c)

						p = session.run(prediction, feed_dict={tfX: Xvalid, tfY: Yvalid})
						e = error_rate(Yvalid_flat, p)
						print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e)

		if show_fig:
			plt.plot(costs)
			plt.show()
예제 #8
0
    def fit(self, X, Y, lr=10e-5, mu=0.99, reg=10e-7, decay=0.99999, eps=10e-3, batch_sz=30, epochs=100, show_fig=True):
        lr = np.float32(lr)
        mu = np.float32(mu)
        reg = np.float32(reg)
        decay = np.float32(decay)
        eps = np.float32(eps)

        # make a validation set
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = Y.astype(np.int32)
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        X, Y = X[:-1000], Y[:-1000]

        # initialize convpool layers
        N, c, width, height = X.shape
        mi = c
        outw = width
        outh = height
        self.convpool_layers = []
        for mo, fw, fh in self.convpool_layer_sizes:
            layer = ConvPoolLayer(mi, mo, fw, fh)
            self.convpool_layers.append(layer)
            outw = (outw - fw + 1) / 2
            outh = (outh - fh + 1) / 2
            mi = mo

        # initialize mlp layers
        K = len(set(Y))
        self.hidden_layers = []
        M1 = self.convpool_layer_sizes[-1][0]*outw*outh # size must be same as output of last convpool layer
        count = 0
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1

        # logistic regression layer
        W, b = init_weight_and_biases(M1, K)
        self.W = theano.shared(W, 'W_logreg')
        self.b = theano.shared(b, 'b_logreg')

        # collect params for later use
        self.params = [self.W, self.b]
        for c in self.convpool_layers:
            self.params += c.params
        for h in self.hidden_layers:
            self.params += h.params

        # for momentum
        dparams = [theano.shared(np.zeros(p.get_value().shape, dtype=np.float32)) for p in self.params]

        # for rmsprop
        cache = [theano.shared(np.zeros(p.get_value().shape, dtype=np.float32)) for p in self.params]

        # set up theano functions and variables
        thX = T.tensor4('X', dtype='float32')
        thY = T.ivector('Y')
        pY = self.forward(thX)

        rcost = reg*T.sum([(p*p).sum() for p in self.params])
        cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost
        prediction = self.th_predict(thX)

        cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction])

        # updates = [
        #     (c, decay*c + (np.float32(1)-decay)*T.grad(cost, p)*T.grad(cost, p)) for p, c in zip(self.params, cache)
        # ] + [
        #     (p, p + mu*dp - lr*T.grad(cost, p)/T.sqrt(c + eps)) for p, c, dp in zip(self.params, cache, dparams)
        # ] + [
        #     (dp, mu*dp - lr*T.grad(cost, p)/T.sqrt(c + eps)) for p, c, dp in zip(self.params, cache, dparams)
        # ]

        # momentum only
        updates = [
            (p, p + mu*dp - lr*T.grad(cost, p)) for p, dp in zip(self.params, dparams)
        ] + [
            (dp, mu*dp - lr*T.grad(cost, p)) for p, dp in zip(self.params, dparams)
        ]

        train_op = theano.function(
            inputs=[thX, thY],
            updates=updates
        )

        n_batches = N // batch_sz
        costs = []
        for i in range(epochs):
            X, Y = shuffle(X, Y)
            for j in range(n_batches):
                Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]
                Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]

                train_op(Xbatch, Ybatch)

                if j % 20 == 0:
                    c, p = cost_predict_op(Xvalid, Yvalid)
                    costs.append(c)
                    e = error_rate(Yvalid, p)
                    print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e)

        if show_fig:
            plt.plot(costs)
            plt.show()