def train(self, x, y): """ Constructs a decision tree classifier from data Parameters ---------- x : numpy.array An N by K numpy array (N is the number of instances, K is the number of attributes) y : numpy.array An N-dimensional numpy array Returns ------- DecisionTreeClassifier A copy of the DecisionTreeClassifier instance """ # Make sure that x and y have the same number of instances assert x.shape[0] == len(y), \ "Training failed. x and y must have the same number of instances." ####################################################################### # ** TASK 2.1: COMPLETE THIS METHOD ** ####################################################################### dataset = ds.ClassifierDataset() dataset.initFromData(x, y) rootSplitObject = dataset.getSplitObjectForRoot() treeStats = ClassifierTreeStats() self.classifierTree = ClassifierTree(dataset, rootSplitObject, treeStats) # set a flag so that we know that the classifier has been trained self.is_trained = True return self
import prune import cProfile repeats = 3 pathToSimple1 = './data/simple1.txt' pathToSimple2 = './data/simple2.txt' pathToTest = './data/test.txt' pathToToy = './data/toy.txt' pathToToy2 = './data/toy2.txt' pathToFull = './data/train_full.txt' pathToNoisy = './data/train_noisy.txt' pathToSub = './data/train_sub.txt' pathToValid = './data/validation.txt' dataset = ds.ClassifierDataset() dataset.initFromFile(pathToFull) dtc = cs.DecisionTreeClassifier() print("FULL") for i in range(repeats): cProfile.run('dtc.train(dataset.attrib, dataset.labels)', None, 'time') dataset.initFromFile(pathToSub) print("\n\n\n=====\n\n\n\n") print("SUB") for i in range(repeats): cProfile.run('dtc.train(dataset.attrib, dataset.labels)', None, 'time') dataset.initFromFile(pathToNoisy) print("\n\n\n=====\n\n\n\n")
import matplotlib.pyplot as plt import numpy as np import torch from torch import optim import torch.nn.functional as F from model import BYOL, ClassifierBYOL, ClassifierScratch import dataset from tqdm import tqdm import glob DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') BATCH_SIZE = 16 EPOCHS = 50 # %% train_data = torchvision.datasets.ImageFolder('data/label/train') train_dataset = dataset.ClassifierDataset(train_data) train_dataloader = DataLoader(train_dataset, shuffle=True, num_workers=NUM_WORKERS, batch_size=BATCH_SIZE) # %% def acc_fn(logits, targets): logits = torch.argmax(logits, dim=1).detach().cpu() targets = targets.detach().cpu() acc = torch.mean((logits == targets) * 1.0) return acc.numpy() def train_batch(batch, model, optimizer):
import numpy as np import dataset as ds from prune import Prune import classification as cs pathToSimple1 = './data/simple1.txt' pathToSimple2 = './data/simple2.txt' pathToTest = './data/test.txt' pathToToy = './data/toy.txt' pathToToy2 = './data/toy2.txt' pathToFull = './data/train_full.txt' pathToNoisy = './data/train_noisy.txt' pathToSub = './data/train_sub.txt' pathToValid = './data/validation.txt' pathToToyValid = "./data/toyvalid.txt" dataset = ds.ClassifierDataset() dataset.initFromFile(pathToNoisy) dtc = cs.DecisionTreeClassifier() dtc.train(dataset.attrib, dataset.labels) validationDataset = ds.ClassifierDataset() validationDataset.initFromFile(pathToValid) Prune(dtc, validationDataset.attrib, validationDataset.labels)