コード例 #1
0
def main():
    train_X, train_y = load_MNIST(name="train")
    #train_y = load_MNIST(name="train")
    val_X, val_y = load_MNIST(name="val")
    #val_y = load_MNIST(name="val")
    test_X = load_MNIST(name="test")
    #test_loss, test_acc = None, None
    one_hot_train_y = one_hot_encoding(train_y)
    one_hot_val_y = one_hot_encoding(val_y)

    # TODO: 1. Build your model
    model = Model(train_X.shape[1], one_hot_train_y.shape[1])
    model.build_model()
    max_epochs = 150
    # print(val_X.shape, one_hot_val_y.shape)
    # TODO: 2. Train your model with training dataset and
    #       validate it  on the validation dataset
    train_loss_list, train_acc_list, val_loss_list, val_acc_list = train(
        model,
        train_X,
        one_hot_train_y,
        val_X,
        one_hot_val_y,
        max_epochs=150,
        lr=0.015,
        batch_size=16)
    # plot loss and accuracy
    x_list = np.arange(1, max_epochs)
    plt.figure(1)
    plt.plot(train_loss_list, label='training')
    plt.plot(val_loss_list, label='validation')
    plt.ylabel('loss', fontsize=16)
    plt.xlabel('num.epochs', fontsize=16)
    plt.legend()
    plt.savefig('loss_curve.png')

    plt.figure(2)
    plt.plot(train_acc_list, label='training')
    plt.plot(val_acc_list, label='validation')
    plt.ylabel('accuracy', fontsize=16)
    plt.xlabel('num.epochs', fontsize=16)
    plt.legend()
    plt.savefig('accuracy_curve.png')
    plt.show
    # TODO: 3. Test your trained model on the test dataset
    #       you need have at least 95% accuracy on the test dataset to receive full scores
    x = inference(model, test_X)
    save_csv(x)
コード例 #2
0
def inference(model, X, y=None, batch_size=16, metric_fn=accuracy_score, **kwargs):
	"""inference Run the inference on the given dataset

	Arguments:
		model {Model} -- The Neural Network model
		X {np.ndarray} -- The dataset input
		y {np.ndarray} -- The sdataset labels

	Keyword Arguments:
		metric {function} -- Metric function to measure the performance of the model 
			(default: {accuracy_score})

	Returns:
		tuple of (float, float): A tuple of the loss and accuracy
	"""
	# TODO: Finish this function
	test_dataloader = Dataloader(X, y=None,batch_size=32, shuffle=False)
	val_dataloader = Dataloader(X, y,batch_size=32, shuffle=True)

	print(len(test_dataloader.X))

	test_pred,val_pred = np.empty([1, 1]),np.empty([1, 1])
	if y is None:
		for i,(inputs) in enumerate(test_dataloader):
			output = model(inputs)   

			pred = np.array([np.argmax(output[i]) for i in range(output.shape[0])]).reshape(-1,1)
			test_pred = np.concatenate((test_pred, pred), axis=0)
		return test_pred[1:,:]
	# raise NotImplementedError
	else:
		val_loss,correct_val = 0,0
		for i,(inputs,labels) in enumerate(val_dataloader):
			labels_one_hot = one_hot_encoding(labels,num_class=10)
			output = model(inputs)   
			loss_val = model.bprop(output,labels_one_hot,istraining=False)
			

			pred = np.array([np.argmax(output[i]) for i in range(output.shape[0])]).reshape(-1,1)
			val_pred = np.concatenate((val_pred, pred), axis=0)
			val_pred = val_pred[1:,:]
			val_loss += loss_val
			correct_val += np.ceil(accuracy_score(pred,labels)*batch_size)
		val_acc = correct_val / len(val_dataloader.y)

		return val_acc, val_loss, val_pred
コード例 #3
0
def inference(model, X, y, batch_size=16, metric_fn=accuracy_score, **kwargs):
    """inference Run the inference on the given dataset

    Arguments:
        model {Model} -- The Neural Network model
        X {np.ndarray} -- The dataset input
        y {np.ndarray} -- The sdataset labels

    Keyword Arguments:
        metric {function} -- Metric function to measure the performance of the model 
            (default: {accuracy_score})

    Returns:
        tuple of (float, float): A tuple of the loss and accuracy
    """

    # TODO: Finish this function
    logits = model(X)
    labels = one_hot_encoding(y)
    loss = model.bprop(logits, labels)
    rightnum = model.loss_fn.rightnum
    accuracy = rightnum / X.shape[0]
    return accuracy, loss, logits.argmax(axis=1)
    raise NotImplementedError
コード例 #4
0
def train(model,
          train_X,
          train_y,
          val_X,
          val_y,
          max_epochs=20,
          lr=1e-3,
          batch_size=16,
          metric_fn=accuracy_score,
          **kwargs):
    """train Train the model

    Arguments:
        model {Model} -- The Model object
        train_X {np.ndarray} -- Training dataset
        train_y {np.ndarray} -- Training labels
        val_X {np.ndarray} -- Validation dataset
        val_y {np.ndarray} -- Validation labels

    Keyword Arguments:
        max_epochs {IntType or int} -- Maximum training expochs (default: {20})
        lr {FloatType or float} -- Learning rate (default: {1e-3})
        batch_size {IntType or int} -- Size of each mini batch (default: {16})
        metric_fn {function} -- Metric function to measure the performance of 
            the model (default: {accuracy_score})
    """
    # TODO: Finish this function
    flag = True
    train_loss_res = []
    train_acc_res = []
    val_loss_res = []
    val_acc_res = []
    N_train = len(train_X)
    N_val = len(val_X)
    one_hot_train_y = one_hot_encoding(train_y)
    one_hot_val_y = one_hot_encoding(val_y)
    for index in range(max_epochs):
        train_loss, train_acc = 0, 0
        val_loss, val_acc = 0, 0
        idxs = np.arange(N_train)
        np.random.shuffle(idxs)
        temp_x, temp_y = train_X[idxs], one_hot_train_y[idxs]
        # training
        for i in range(0, N_train, batch_size):
            range_ = range(i, min(i + batch_size, N_train))
            logits = model(temp_x[range_])
            loss = model.bprop(logits, temp_y[range_])
            rightnum = model.loss_fn.rightnum
            train_loss += loss
            train_acc += rightnum
            model.update_parameters(lr)
        # validation

        train_loss /= N_train
        train_acc /= N_train
        train_loss_res.append(train_loss)
        train_acc_res.append(train_acc)

        for i in range(0, N_val, batch_size):
            range_ = range(i, min(i + batch_size, N_val))
            logits = model(val_X[range_])
            loss = model.bprop(logits, one_hot_val_y[range_])
            rightnum = model.loss_fn.rightnum
            val_loss += loss
            val_acc += rightnum
        val_loss /= N_val
        val_acc /= N_val
        val_loss_res.append(val_loss)
        val_acc_res.append(val_acc)
        if (val_acc > 0.95 and flag):
            flag = False
            lr /= 10
        print(
            "epoch: {}, train acc: {:.2f}%, train loss: {:.3f}, val acc: {:.2f}%, val loss: {:.3f}"
            .format(index + 1, train_acc * 100, train_loss, val_acc * 100,
                    val_loss))
    return train_loss_res, train_acc_res, val_loss_res, val_acc_res
コード例 #5
0
def main():
	print('loading data #####')
	train_X, train_y = load_MNIST(path ='dataset/',name="train")
	val_X, val_y = load_MNIST(path = 'dataset/', name="val")
	test_X = load_MNIST(path = 'dataset/', name="test")

	one_hot_train_y = one_hot_encoding(train_y)    


	test_loss, test_acc = None, None
	print('loading data complete #####')

	# TODO: 1. Build your model
	# TODO: 2. Train your model with training dataset and
	#       validate it  on the validation dataset
	# TODO: 3. Test your trained model on the test dataset
	#       you need have at least 95% accuracy on the test dataset to receive full scores

	# Your code starts here

	#NOTE: WE HAVE PROVIDED A SKELETON FOR THE MAIN FUNCTION. FEEL FREE TO CHANGE IT AS YOU WISH, THIS IS JUST A SUGGESTED FORMAT TO HELP YOU.

	batchSize = 32
	learningRate = 0.2 
	model = Model(input_dim = 784,output_dim = 10)
	model.build_model()
	print('Model built #####')
	model, train_acc, train_loss, val_acc, val_loss = train(model, train_X, train_y, val_X, val_y, max_epochs=200, lr=learningRate, batch_size=batchSize, metric_fn=accuracy_score)
	print('Training complete #####')

	# Plot of train and val accuracy vs iteration
	plt.figure(figsize=(10,7))
	plt.ylabel('Accuracy')
	plt.xlabel('Number of iterations')
	plt.title('Accuracy vs number of iterations')
	plt.plot(np.linspace(0,199,200), train_acc, label = 'Train accuracy across iterations')
	plt.plot(np.linspace(0,198,100), val_acc, label = 'Val accuracy across iterations')
	plt.legend(loc = 'upper right')
	plt.show()

	# Plot of train and val loss vs iteration
	plt.figure(figsize=(10,7))
	plt.ylabel('Loss')
	plt.xlabel('Number of iterations')
	plt.title('Loss vs number of iterations')
	plt.plot(np.linspace(0,199,200), train_loss, label = 'Train loss across iterations')
	plt.plot(np.linspace(0,198,100), val_loss, label = 'Val loss across iteration')
	plt.legend(loc='upper right')
	plt.show()

	# Implement inference such that you predict the labels and also evaluate val_accuracy and loss if true labels are provided
	val_acc, val_loss, val_pred = inference(model, val_X, val_y, batch_size = batchSize)

	# Inference on test dataset without labels

	#Implement inference function so that you can return the test prediction output and save it in test_pred. You are allowed to create a different function to generate just the predicted labels.
	test_pred = inference(model, test_X, test_y=None, batch_size = batchSize).reshape(-1, 1)
	print(test_pred.shape)
	save_csv(test_pred)
	# Your code ends here

	print("Validation loss: {0}, Validation Acc: {1}%".format(val_loss, 100 * val_acc))
	if val_acc > 0.95:
		print("Your model is well-trained.")
	else:
		print("You still need to tune your model")
コード例 #6
0
def train(model,
		  train_X,
		  train_y,
		  val_X,
		  val_y,
		  max_epochs=20,
		  lr=1e-3,
		  batch_size=16,
		  metric_fn=accuracy_score,
		  **kwargs):
	"""train Train the model

	Arguments:
		model {Model} -- The Model object
		train_X {np.ndarray} -- Training dataset
		train_y {np.ndarray} -- Training labels
		val_X {np.ndarray} -- Validation dataset
		val_y {np.ndarray} -- Validation labels

	Keyword Arguments:
		max_epochs {IntType or int} -- Maximum training expochs (default: {20})
		lr {FloatType or float} -- Learning rate (default: {1e-3})
		batch_size {IntType or int} -- Size of each mini batch (default: {16})
		metric_fn {function} -- Metric function to measure the performance of 
			the model (default: {accuracy_score})
	"""
	# TODO: Finish this function
	train_dataloader = Dataloader(X=train_X, y=train_y, batch_size=32, shuffle=True)
	val_dataloader = Dataloader(val_X, val_y,batch_size=32, shuffle=True) 

	train_acc,train_loss = [],[]
	val_acc,val_loss = [],[]
	for epoch in range(max_epochs):
		lr_decay = decay_learning_rate(lr,epoch)
		train_loss_single,correct_train = 0,0
		# train_acc = []
		for i,(inputs,labels) in enumerate(train_dataloader):
			labels_one_hot = one_hot_encoding(labels,num_class=10)
			output = model(inputs)
			loss_train = model.bprop(output,labels_one_hot)
			model.update_parameters(lr = lr_decay)

			
			pred = np.array([np.argmax(output[i]) for i in range(output.shape[0])]).reshape(-1,1)				
			train_loss_single += loss_train
			correct_train += np.ceil(accuracy_score(pred,labels)*batch_size)
		train_acc_single = correct_train/len(train_dataloader.y)
		train_acc.append(train_acc_single)
		train_loss.append(train_loss_single)
		print("Epoch: ", epoch+1, "Training Accuracy:", 100*train_acc_single,"%", "Loss:", train_loss_single / len(train_dataloader.y), "Learning Rate:", lr_decay)

		if (epoch+1)%2 == 0:
			#get validation loss and validation accuracy every two epochs
			val_loss_single,correct_val = 0,0
			for i,(inputs,labels) in enumerate(val_dataloader):
				labels_one_hot = one_hot_encoding(labels,num_class=10)
				output = model(inputs)   
				loss_val = model.bprop(output,labels_one_hot,istraining=False)
				
				pred = np.array([np.argmax(output[i]) for i in range(output.shape[0])]).reshape(-1,1)
				val_loss_single += loss_val
				correct_val += np.ceil(accuracy_score(pred,labels)*batch_size)
			val_acc_single = correct_val/len(val_dataloader.y)
			val_acc.append(val_acc_single)
			val_loss.append(val_loss_single)			
			print("Epoch: ", epoch+1, "Validation Accuracy:", 100*val_acc_single,"%", "Validation Loss:", val_loss_single / len(val_dataloader.y))
	return model, train_acc, train_loss, val_acc, val_loss
コード例 #7
0
def main():
    print('loading data #####')
    train_X, train_y = load_MNIST(path='dataset/', name="train")
    val_X, val_y = load_MNIST(path='dataset/', name="val")
    test_X = load_MNIST(path='dataset/', name="test")
    print('loading data complete #####')
    batchSize = 16
    learningRate = 0.2
    model = Model(input_dim=784, output_dim=10)
    model.build_model()
    print('Model built #####')
    t_loss = []
    t_acc = []
    v_loss = []
    v_acc = []
    one_hot_train_y = one_hot_encoding(train_y)
    one_hot_val_y = one_hot_encoding(val_y)

    #50k training samples, 200 epochs --> 1562 batches of size 32 samples/batch
    #Start training
    for k in range(1, 201):

        print("\n\nEpoch", k)
        print("********")
        if (k == 160):
            learningRate = 0.02

        #Training

        train_dataloader = Dataloader(X=train_X,
                                      y=one_hot_train_y,
                                      batch_size=batchSize)
        model.accuracy = 0
        train_loss = 0
        #for each batch
        for i, (features, labels) in enumerate(train_dataloader):
            #call model train
            train_loss += train(model,
                                features,
                                labels,
                                max_epochs=200,
                                lr=learningRate,
                                batch_size=batchSize,
                                metric_fn=accuracy_score)
        t_acc.append(model.accuracy / 50000)
        t_loss.append(train_loss / 50000)
        print("Training Loss:", train_loss / 50000)
        print("Training Accuracy:", model.accuracy / 50000)

        #Validation
        if (k % 2 == 0):
            model.accuracy = 0
            val_loss = 0
            val_dataloader = Dataloader(X=val_X,
                                        y=one_hot_val_y,
                                        batch_size=batchSize)
            for i, (features, labels) in enumerate(val_dataloader):
                #call validation
                val_loss += inference(model,
                                      features,
                                      labels,
                                      batch_size=batchSize)

            v_acc.append(model.accuracy / 10000)
            v_loss.append(val_loss / 10000)
            print("Validation Loss:", val_loss / 10000)
            print("Validation Accuracy:", model.accuracy / 10000)

    print('Training complete #####')

    # Plot of train and val accuracy vs iteration
    plt.figure(figsize=(10, 7))
    plt.ylabel('Accuracy')
    plt.xlabel('Number of iterations')
    plt.title('Accuracy vs number of iterations')
    plt.plot(np.linspace(0, 199, 200),
             t_acc,
             label='Train accuracy across iterations')
    plt.plot(np.linspace(0, 198, 100),
             v_acc,
             label='Val accuracy across iterations')
    plt.legend(loc='lower right')
    plt.show()

    # Plot of train and val loss vs iteration
    plt.figure(figsize=(10, 7))
    plt.ylabel('Loss')
    plt.xlabel('Number of iterations')
    plt.title('Loss vs number of iterations')
    plt.plot(np.linspace(0, 199, 200),
             t_loss,
             label='Train loss across iterations')
    plt.plot(np.linspace(0, 198, 100),
             v_loss,
             label='Val loss across iteration')
    plt.legend(loc='upper right')
    plt.show()

    # Inference on test dataset without labels
    test_pred = predict(model, test_X)

    save_csv(test_pred)

    print("Validation loss: {0}, Validation Acc: {1}%".format(
        v_loss[-1], 100 * v_acc[-1]))