Esempio n. 1
0
	def __init__(self, **kwargs):
		"""Initialization.

		Args:
			debug (kwargs): boolean indicating debug mode
		"""
		self.debug = Debug("debug" in kwargs and kwargs["debug"])
		self.graph = []
Esempio n. 2
0
def main():
	# set random seed
	np.random.seed(13141)

	# debug mode
	debug_mode = False
	dbg = Debug(debug_mode)

	# parse arguments
	parser = argparse.ArgumentParser(description='Train and test neural network on cifar dataset.')
	parser.add_argument('experiment_name', help='used for outputting log files')
	parser.add_argument('--num_hidden_units', type=int, help='number of hidden units')
	parser.add_argument('--learning_rate', type=float, help='learning rate for solver')
	parser.add_argument('--momentum_mu', type=float, help='mu for momentum solver')
	parser.add_argument('--mini_batch_size', type=int, help='mini batch size')
	parser.add_argument('--num_epoch', type=int, help='number of epochs')
	args = parser.parse_args()

	# experiment name
	experiment_name = args.experiment_name
	iter_log_file = "logs/{0}_iter_log.txt".format(experiment_name)
	epoch_log_file = "logs/{0}_epoch_log.txt".format(experiment_name)

	# load data
	print("Loading dataset...")
	timer.begin("dataset")
	DATASET_PATH = 'cifar-2class-py2/cifar_2class_py2.p'
	data = CifarDataset()
	data.load(DATASET_PATH)
	print("Loaded dataset in {0:2f}s.".format(timer.getElapsed("dataset")))

	# get data stats
	num_training = data.get_num_train()
	num_test = data.get_num_test()
	input_dim = data.get_data_dim()

	# hyperparameters
	num_hidden_units = 50 if args.num_hidden_units is None else args.num_hidden_units
	learning_rate = 0.01 if args.learning_rate is None else args.learning_rate
	momentum_mu = 0.6 if args.momentum_mu is None else args.momentum_mu
	mini_batch_size = 64 if args.mini_batch_size is None else args.mini_batch_size
	num_epoch = (500 if not debug_mode else 1) if args.num_epoch is None else args.num_epoch

	# print hyperparameters
	print("num_hidden_units: {0}".format(num_hidden_units))
	print("learning_rate: {0}".format(learning_rate))
	print("momentum_mu: {0}".format(momentum_mu))
	print("mini_batch_size: {0}".format(mini_batch_size))
	print("num_epoch: {0}".format(num_epoch))

	# network
	net = Sequential(debug=debug_mode)
	net.add( LinearLayer(input_dim, num_hidden_units) )
	net.add( ReluLayer() )
	net.add( LinearLayer(num_hidden_units, 2) )
	net.add( SoftMaxLayer() )

	print("{0}\n".format(net))

	# loss
	loss = CrossEntropyLoss()

	# error metrics
	training_objective = Objective(loss)
	test_objective = Objective(loss)
	errorRate = ErrorRate()

	print("Loss function: {0}\n".format(loss))

	# solver
	solver = MomentumSolver(lr=learning_rate, mu=momentum_mu)

	# training loop
	monitor = Monitor()
	monitor.createSession(iter_log_file, epoch_log_file)
	cum_iter = 0
	for epoch in range(num_epoch):
		print("Training epoch {0}...".format(epoch))
		timer.begin("epoch")
		# training
		for iter, batch in enumerate(data.get_train_batches(mini_batch_size)):
			if iter > 1 and debug_mode:
				break

			timer.begin("iter")

			# get batch
			(x, target) = batch
			batch_size = x.shape[2]

			# forward
			z = net.forward(x)
			dbg.disp("\toutput: {0}".format(z))
			dbg.disp("\toutput shape: {0}".format(z.shape))

			# loss
			if debug_mode:
				l = loss.forward(z, target)
				dbg.disp("\tloss: {0}".format(l))
				dbg.disp("\tloss shape: {0}".format(l.shape))

			# backward loss
			gradients = loss.backward(z, target)
			dbg.disp("\tgradients: {0}".format(gradients))
			dbg.disp("\tgradients shape: {0}".format(gradients.shape))

			# backward
			grad_x = net.backward(x, gradients)
			dbg.disp("\tgrad_x: {0}".format(grad_x))
			dbg.disp("\tgrad_x: {0}".format(grad_x.shape))

			# update parameters
			net.updateParams(solver)

			# metrics and timing
			loss_avg = training_objective.compute(z, target)
			elapsed = timer.getElapsed("iter")

			# logging
			print("\t[iter {0}]\tloss: {1}\telapsed: {2}".format(iter, loss_avg, elapsed))
			monitor.recordIteration(cum_iter, loss_avg, elapsed)

			cum_iter += 1

		# evaluation on test set
		target = data.get_test_labels()
		x = data.get_test_data()
		output = net.forward(x)
		loss_avg_test = test_objective.compute(output, target)
		error_rate_test = errorRate.compute(output, target)

		# evaluation on training set
		target = data.get_train_labels()
		x = data.get_train_data()
		output = net.forward(x)
		loss_avg_train = training_objective.compute(output, target)
		error_rate_train = errorRate.compute(output, target)

		# timing
		elapsed = timer.getElapsed("epoch")

		# logging
		print("End of epoch:\ttest objective: {0}\ttrain objective: {1}".format(loss_avg_test,
												 								loss_avg_train))
		print("\t\ttest error rate: {0}\ttrain error rate: {1}".format(error_rate_test,
												 					   error_rate_train))
		print("Finished epoch {1} in {0:2f}s.\n".format(elapsed, epoch))
		monitor.recordEpoch(epoch, loss_avg_train, loss_avg_test,
							error_rate_train, error_rate_test, elapsed)

	monitor.finishSession()
Esempio n. 3
0
    def __init__(self, **kwargs):

        self.debug = Debug("debug" in kwargs and kwargs["debug"])
        self.graph = []
Esempio n. 4
0
class Sequential:
    def __init__(self, **kwargs):

        self.debug = Debug("debug" in kwargs and kwargs["debug"])
        self.graph = []

    def size(self):

        return len(self.graph)

    def get(self, index):

        return self.graph[index]

    def add(self, layer):

        self.graph.append(layer)

        self.debug.disp("Added layer: [{0}] {1}".format(
            len(self.graph) - 1, layer))

    def remove(self, index=None):

        if index is None:
            self.graph.pop()
        else:
            self.graph.pop(index)

        self.debug.disp("Removed layer from end: {0}".format(layer))

    def insert(self, layer, index):

        self.graph.insert(layer, index)

        self.debug.disp("Inserted layer: [{0}] {1}".format(index, layer))

    def forward(self, x):

        self.debug.disp("[forward] Running forward pass...\n")
        self.debug.disp("[forward] Initial Input={0}\n".format(x))
        self.debug.disp("[forward] Initial Input Shape={0}\n".format(x.shape))

        z = x
        for index, layer in enumerate(self.graph):
            self.debug.disp("[forward] [{0}] {1}".format(index, layer))
            self.debug.disp("[forward] Input=\n\t\t{0}".format(z))
            self.debug.disp("[forward] Input Shape=\n\t\t{0}".format(z.shape))

            z = layer.forward(z)

            self.debug.disp("[forward] Output=\n\t\t{0}\n".format(z))
            self.debug.disp("[forward] Output Shape=\n\t\t{0}\n".format(
                z.shape))

        self.debug.disp("[forward] Final Output={0}\n".format(z))
        self.debug.disp("[forward] Final Output Shape={0}\n".format(z.shape))
        self.debug.disp("[forward] Done with forward pass.")

        return z

    def backward(self, x, grad):

        self.debug.disp("[backward] Running backward pass...\n")
        self.debug.disp("[backward] Initial Input x={0}\n".format(x))
        self.debug.disp("[backward] Initial Input x shape={0}\n".format(
            x.shape))
        self.debug.disp("[backward] Initial Input grad={0}\n".format(grad))
        self.debug.disp("[backward] Initial Input grad shape={0}\n".format(
            grad.shape))

        g = grad
        for index, layer in enumerate(reversed(self.graph)):
            self.debug.disp("[backward] [{0}] {1}".format(index, layer))
            self.debug.disp("[backward] Input=\n\t\t{0}".format(g))
            self.debug.disp("[backward] Input Shape=\n\t\t{0}".format(g.shape))

            g = layer.backward(g)

            self.debug.disp("[backward] Output=\n\t\t{0}\n".format(g))
            self.debug.disp("[backward] Output Shape=\n\t\t{0}\n".format(
                g.shape))

        self.debug.disp("[backward] Final Output={0}\n".format(g))
        self.debug.disp("[backward] Final Output Shape={0}\n".format(g.shape))
        self.debug.disp("[backward] Done with backward pass.")

        return g

    def updateParams(self, solver):
        self.debug.disp("[updateParams] Updating network params...\n")

        for index, layer in enumerate(reversed(self.graph)):
            self.debug.disp("[updateParams] [{0}] {1}".format(index, layer))
            layer.updateParams(solver)

        self.debug.disp("\n[updateParams] Done updating params.")

    def __str__(self):
        string = "Sequential Network: "
        for index, layer in enumerate(self.graph):
            string += "\n\t[{0}] {1}".format(index, layer)
        return string
Esempio n. 5
0
def MLP():
    np.random.seed(13141)

    debug_mode = False
    dbg = Debug(debug_mode)

    parser = argparse.ArgumentParser(
        description='Train and test neural network on cifar dataset.')
    parser.add_argument('experiment_name',
                        help='used for outputting log files')
    parser.add_argument('--num_hidden_units',
                        type=int,
                        help='number of hidden units')
    parser.add_argument('--learning_rate',
                        type=float,
                        help='learning rate for solver')
    parser.add_argument('--momentum_mu',
                        type=float,
                        help='mu for momentum solver')
    parser.add_argument('--mini_batch_size', type=int, help='mini batch size')
    parser.add_argument('--num_epoch', type=int, help='number of epochs')
    args = parser.parse_args()

    experiment_name = args.experiment_name
    iter_log_file = "logs/{0}_iter_log.txt".format(experiment_name)
    epoch_log_file = "logs/{0}_epoch_log.txt".format(experiment_name)
    print
    timer.begin("dataset")
    DATASET_PATH = 'cifar-2class-py2/cifar_2class_py2.p'
    data = CifarDataset()
    data.load(DATASET_PATH)

    num_training = data.get_num_train()
    num_test = data.get_num_test()
    input_dim = data.get_data_dim()

    num_hidden_units = 50 if args.num_hidden_units is None else args.num_hidden_units
    learning_rate = 0.01 if args.learning_rate is None else args.learning_rate
    momentum_mu = 0.6 if args.momentum_mu is None else args.momentum_mu
    mini_batch_size = 64 if args.mini_batch_size is None else args.mini_batch_size
    num_epoch = (500 if not debug_mode else
                 1) if args.num_epoch is None else args.num_epoch

    print("num_hidden_units: {0}".format(num_hidden_units))
    print("learning_rate: {0}".format(learning_rate))
    print("momentum_mu: {0}".format(momentum_mu))
    print("mini_batch_size: {0}".format(mini_batch_size))
    print("num_epoch: {0}".format(num_epoch))

    net = Sequential(debug=debug_mode)
    net.add(LinearLayer(input_dim, num_hidden_units))
    net.add(ReluLayer())
    net.add(LinearLayer(num_hidden_units, 2))
    net.add(SoftMaxLayer())

    print("{0}\n".format(net))

    loss = CrossEntropyLoss()

    training_objective = Objective(loss)
    test_objective = Objective(loss)
    errorRate = ErrorRate()

    print("Loss function: {0}\n".format(loss))

    solver = MomentumSolver(lr=learning_rate, mu=momentum_mu)

    monitor = Monitor()
    monitor.createSession(iter_log_file, epoch_log_file)
    cum_iter = 0
    for epoch in range(num_epoch):
        print("Training epoch {0}...".format(epoch))
        timer.begin("epoch")
        for iter, batch in enumerate(data.get_train_batches(
                mini_batch_size)):  #BATCHES ARE FORMED HERE
            if iter > 1 and debug_mode:
                break

            timer.begin("iter")

            (x, target) = batch
            batch_size = x.shape[2]

            z = net.forward(x)
            dbg.disp("\toutput: {0}".format(z))
            dbg.disp("\toutput shape: {0}".format(z.shape))

            if debug_mode:
                l = loss.forward(z, target)
                dbg.disp("\tloss: {0}".format(l))
                dbg.disp("\tloss shape: {0}".format(l.shape))

            gradients = loss.backward(z, target)
            dbg.disp("\tgradients: {0}".format(gradients))
            dbg.disp("\tgradients shape: {0}".format(gradients.shape))

            grad_x = net.backward(x, gradients)
            dbg.disp("\tgrad_x: {0}".format(grad_x))
            dbg.disp("\tgrad_x: {0}".format(grad_x.shape))

            net.updateParams(solver)

            loss_avg = training_objective.compute(z, target)
            elapsed = timer.getElapsed("iter")

            print("\t[iter {0}]\tloss: {1}\telapsed: {2}".format(
                iter, loss_avg, elapsed))
            monitor.recordIteration(cum_iter, loss_avg, elapsed)

            cum_iter += 1

        target = data.get_test_labels()
        x = data.get_test_data()
        output = net.forward(x)  #forward_layer
        loss_avg_test = test_objective.compute(output, target)
        error_rate_test = errorRate.compute(output, target)  #100 % - ACCURACY

        target = data.get_train_labels()
        x = data.get_train_data()
        output = net.forward(x)  #forward_layer
        loss_avg_train = training_objective.compute(output, target)
        error_rate_train = errorRate.compute(output, target)  #100 % - ACCURACY

        elapsed = timer.getElapsed("epoch")

        print(
            "End of epoch:\ttest objective: {0}\ttrain objective: {1}".format(
                loss_avg_test, loss_avg_train))
        print("\t\ttest error rate: {0}\ttrain error rate: {1}".format(
            error_rate_test, error_rate_train))
        print("Finished epoch {1} in {0:2f}s.\n".format(elapsed, epoch))
        monitor.recordEpoch(epoch, loss_avg_train, loss_avg_test,
                            error_rate_train, error_rate_test, elapsed)

    monitor.finishSession()
Esempio n. 6
0
class Sequential:
	"""Sequential is a type of network container that organizes
	modules in a sequential network.

	Assumes working with consistent data format
	for all inputs and outputs:
		mxnxb numpy array
			b refers to batch
			m,n are arbitrary
	"""

	def __init__(self, **kwargs):
		"""Initialization.

		Args:
			debug (kwargs): boolean indicating debug mode
		"""
		self.debug = Debug("debug" in kwargs and kwargs["debug"])
		self.graph = []

	def size(self):
		"""Returns the number of layers in the network.

		Returns:
			number of layers
		"""
		return len(self.graph)

	def get(self, index):
		"""Get the layer at position index.

		Args:
			index: index to query

		Returns:
			layer object
		"""
		return self.graph[index]

	def add(self, layer):
		"""Add a layer to the end of the network.

		Args:
			layer: layer object
		"""
		self.graph.append(layer)

		self.debug.disp("Added layer: [{0}] {1}".format(len(self.graph)-1, layer))

	def remove(self, index=None):
		"""Remove a layer at the end of the network or at the specified index.

		Args:
			index: index to remove layer, otherwise remove last layer
		"""
		if index is None:
			self.graph.pop()
		else:
			self.graph.pop(index)

		self.debug.disp("Removed layer from end: {0}".format(layer))

	def insert(self, layer, index):
		"""Insert a layer at the specified index.

		Args:
			layer: layer object
			index: index to insert
		"""
		self.graph.insert(layer, index)

		self.debug.disp("Inserted layer: [{0}] {1}".format(index, layer))

	def forward(self, x):
		"""Performs a forward pass.

		Calls forward() on all layers from in a forward sequence.
		This is basically inference.

		Args:
			x: input data

		Returns:
			result of forward pass (last layer)
		"""
		self.debug.disp("[forward] Running forward pass...\n")
		self.debug.disp("[forward] Initial Input={0}\n".format(x))
		self.debug.disp("[forward] Initial Input Shape={0}\n".format(x.shape))

		z = x
		for index, layer in enumerate(self.graph):
			self.debug.disp("[forward] [{0}] {1}".format(index, layer))
			self.debug.disp("[forward] Input=\n\t\t{0}".format(z))
			self.debug.disp("[forward] Input Shape=\n\t\t{0}".format(z.shape))

			z = layer.forward(z)

			self.debug.disp("[forward] Output=\n\t\t{0}\n".format(z))
			self.debug.disp("[forward] Output Shape=\n\t\t{0}\n".format(z.shape))

		self.debug.disp("[forward] Final Output={0}\n".format(z))
		self.debug.disp("[forward] Final Output Shape={0}\n".format(z.shape))
		self.debug.disp("[forward] Done with forward pass.")

		return z

	def backward(self, x, grad):
		"""Performs a backward pass.

		Calls backward() on all layers in a backward sequence.

		Does not update weights!
		This is basically computing gradients in advance for backpropagation.

		Also, forward() MUST BE CALLED BEFORE backward()!

		Args:
			x: input dataset
			grad: output gradient (computed from loss function)

		Returns:
			gradient w.r.t. to input
		"""
		self.debug.disp("[backward] Running backward pass...\n")
		self.debug.disp("[backward] Initial Input x={0}\n".format(x))
		self.debug.disp("[backward] Initial Input x shape={0}\n".format(x.shape))
		self.debug.disp("[backward] Initial Input grad={0}\n".format(grad))
		self.debug.disp("[backward] Initial Input grad shape={0}\n".format(grad.shape))

		g = grad
		for index, layer in enumerate(reversed(self.graph)):
			self.debug.disp("[backward] [{0}] {1}".format(index, layer))
			self.debug.disp("[backward] Input=\n\t\t{0}".format(g))
			self.debug.disp("[backward] Input Shape=\n\t\t{0}".format(g.shape))

			g = layer.backward(g)

			self.debug.disp("[backward] Output=\n\t\t{0}\n".format(g))
			self.debug.disp("[backward] Output Shape=\n\t\t{0}\n".format(g.shape))

		self.debug.disp("[backward] Final Output={0}\n".format(g))
		self.debug.disp("[backward] Final Output Shape={0}\n".format(g.shape))
		self.debug.disp("[backward] Done with backward pass.")

		return g

	def updateParams(self, solver):
		"""Update the parameters of the network.

		Backpropagation is basically backward() followed by updateParams().
		backward() MUST BE CALLED BEFORE updateParams()!

		Args:
			solver: solver object for updating weights
		"""
		self.debug.disp("[updateParams] Updating network params...\n")

		for index, layer in enumerate(reversed(self.graph)):
			self.debug.disp("[updateParams] [{0}] {1}".format(index, layer))
			layer.updateParams(solver)

		self.debug.disp("\n[updateParams] Done updating params.")

	def __str__(self):
		string = "Sequential Network: "
		for index, layer in enumerate(self.graph):
			string += "\n\t[{0}] {1}".format(index, layer)
		return string