def loader(config, ctx): """ Description : dataloder for omniglot dataset """ N = config.N K = config.K iterations = config.iterations batch_size = config.batch_size download = config.download train_dataset = OmniglotDataset(mode='train', download=download) test_dataset = OmniglotDataset(mode='test', download=download) tr_sampler = BatchSampler(labels=train_dataset.y,\ classes_per_it=N,\ num_samples=K,\ iterations=iterations,\ batch_size=batch_size) te_sampler = BatchSampler(labels=test_dataset.y,\ classes_per_it=N,\ num_samples=K,\ iterations=iterations,\ batch_size=int(batch_size / len(ctx))) tr_dataloader = DataLoader(train_dataset, batch_sampler=tr_sampler) te_dataloader = DataLoader(test_dataset, batch_sampler=te_sampler) return tr_dataloader, te_dataloader
def train(self, train_x, train_y, nb_epochs = 1000, batch_size = 1000, lr = 0.1): self.batch_sampler = BatchSampler(batch_size) costs = [] accuracies = [] for epoch in range(nb_epochs): lr_decay = lr * (nb_epochs - epoch)/nb_epochs batch_x, batch_y = self.batch_sampler.sample(train_x, train_y) print("Shape batch_x: " + str(batch_x.shape)) print("Shape train_y: " + str(batch_y.shape)) cost, y_hat = self.__forward_prop(batch_x, batch_y) grad = self.__backward_prop(batch_x, batch_y, y_hat) self.learning_update(grad, lr) if epoch%100 == 0: prediction = self.predict(train_x) > 0.5 accuracy = self.metrics_tracker.accuracy(train_y, prediction) print("Epoch: " + str(epoch)) print("Cost: " + str(cost)) print("Gradients (W, b): " + str(grad_w)+ ", " + str(grad_b)) print("Weights: " + str(self.weights) + ", " + str(self.bias)) print("Accuracy: " + str(accuracy)) costs.append(cost) accuracies.append(accuracy) return accuracies, costs
def init_dataset(opt): ''' Initialize the datasets, samplers and dataloaders ''' if opt.dataset == 'omniglot': train_dataset = OmniglotDataset(mode='train') val_dataset = OmniglotDataset(mode='val') trainval_dataset = OmniglotDataset(mode='trainval') test_dataset = OmniglotDataset(mode='test') elif opt.dataset == 'mini_imagenet': train_dataset = MiniImagenetDataset(mode='train') val_dataset = MiniImagenetDataset(mode='val') trainval_dataset = MiniImagenetDataset(mode='val') test_dataset = MiniImagenetDataset(mode='test') tr_sampler = BatchSampler(labels=train_dataset.y, classes_per_it=opt.num_cls, num_samples=opt.num_samples, iterations=opt.iterations, batch_size=opt.batch_size) val_sampler = BatchSampler(labels=val_dataset.y, classes_per_it=opt.num_cls, num_samples=opt.num_samples, iterations=opt.iterations, batch_size=opt.batch_size) trainval_sampler = BatchSampler(labels=trainval_dataset.y, classes_per_it=opt.num_cls, num_samples=opt.num_samples, iterations=opt.iterations, batch_size=opt.batch_size) test_sampler = BatchSampler(labels=test_dataset.y, classes_per_it=opt.num_cls, num_samples=opt.num_samples, iterations=opt.iterations, batch_size=opt.batch_size) tr_dataloader = torch.utils.data.DataLoader(train_dataset, batch_sampler=tr_sampler) val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_sampler=val_sampler) trainval_dataloader = torch.utils.data.DataLoader( trainval_dataset, batch_sampler=trainval_sampler) test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_sampler=test_sampler) return tr_dataloader, val_dataloader, trainval_dataloader, test_dataloader
class LinearRegression(Predictor): def __init__(self): self.weights = None self.bias = None self.batch_sampler = None def train(self, train_x, train_y, nb_epochs = 1000, batch_size = 1000, lr = 0.1): self.weights = np.random.randn(*(1, train_x.shape[1])) self.bias = np.zeros((1, train_x.shape[1])) self.batch_sampler = BatchSampler(batch_size) for epoch in range(nb_epochs): batch_x, batch_y = self.batch_sampler.sample(train_x, train_y) y_hat = self.predict(batch_x) cost = self.__compute_cost(y_hat, batch_y) grad_w, grad_b = self.__compute_grad(batch_x, y_hat, batch_y) self.weights = self.weights - lr*grad_w self.bias = self.bias - lr*grad_b print("Epoch: " + str(epoch)) print("Cost: " + str(cost)) print("Gradients (W, b): " + str(grad_w)+ ", " + str(grad_b)) print("Weights: " + str(self.weights) + ", " + str(self.bias)) def predict(self, test_x): return self.weights * test_x + self.bias def __compute_cost(self, y_hat, y): return np.mean(np.fabs(y_hat**2 - y**2)) def __compute_grad(self, batch_x, y_hat, train_y): grad_w = 2*np.mean((y_hat - train_y)*batch_x) grad_b = 2*np.mean(y_hat - train_y) return grad_w, grad_b
def train(self, train_x, train_y, nb_epochs = 1000, batch_size = 1000, lr = 0.1): self.weights = np.random.randn(*(1, train_x.shape[1])) self.bias = np.zeros((1, train_x.shape[1])) self.batch_sampler = BatchSampler(batch_size) for epoch in range(nb_epochs): batch_x, batch_y = self.batch_sampler.sample(train_x, train_y) y_hat = self.predict(batch_x) cost = self.__compute_cost(y_hat, batch_y) grad_w, grad_b = self.__compute_grad(batch_x, y_hat, batch_y) self.weights = self.weights - lr*grad_w self.bias = self.bias - lr*grad_b print("Epoch: " + str(epoch)) print("Cost: " + str(cost)) print("Gradients (W, b): " + str(grad_w)+ ", " + str(grad_b)) print("Weights: " + str(self.weights) + ", " + str(self.bias))
def create_sampler(self, labels): if self.mode == 'train': classes_per_it = self.arg_settings.classes_per_it_tr num_samples = self.arg_settings.num_support_tr + self.arg_settings.num_query_tr else: classes_per_it = self.arg_settings.classes_per_it_val num_samples = self.arg_settings.num_support_val + self.arg_settings.num_query_val return BatchSampler(labels=labels, classes_per_it=classes_per_it, num_samples=num_samples, iterations=self.arg_settings.iterations)
def test_trainer_8(self): init_seed(options={"seed": 0}) # learning rate scheduler step lr_scheduler_step = 15 num_support_tr = 6 num_query_tr = 12 num_samples = num_support_tr + num_query_tr # number of random classes per episode for training # this should be equal or less than the unique number # of classes in the dataset classes_per_it = 3 iterations = 10 proto_net = ProtoNetTUF(encoder=linear(in_features=2, out_features=3)) train_engine = TrainEngine(model=proto_net) # optimizer to be used for learning optimizer = optim.Adam(params=proto_net.parameters(), lr=0.1, weight_decay=0.001) # how to reduce the learning rate lr_scheduler = torch.optim.lr_scheduler.StepLR( optimizer=optimizer, gamma=0.01, step_size=lr_scheduler_step, verbose=True) train_loader = TUFDataset(filename=Path("./test_data/train_data.csv"), dataset_type="train") sampler = BatchSampler(labels=train_loader.labels, classes_per_it=classes_per_it, num_samples=num_samples, iterations=iterations, mode="train") dataloader = torch.utils.data.DataLoader(train_loader, batch_sampler=sampler) options = { "optimizer": optimizer, "lr_scheduler": lr_scheduler, "max_epochs": 1, "device": "cpu", "sample_loader": dataloader, "iterations": iterations, "num_support_tr": num_support_tr } train_engine.train(options=options)
def train(self, train_x, train_y, nb_epochs=1000, batch_size=1000, lr=0.1, lambd=0.000): self.weights = np.zeros(shape=(train_x.shape[0], 1)) self.bias = 0 self.batch_sampler = BatchSampler(batch_size) costs = [] accuracies = [] for epoch in range(nb_epochs): lr_decay = lr * (nb_epochs - epoch) / nb_epochs batch_x, batch_y = self.batch_sampler.sample(train_x, train_y) print("Shape batch_x: " + str(batch_x.shape)) print("Shape train_y: " + str(batch_y.shape)) y_hat = self.predict(batch_x) cost = self.__compute_cost(y_hat, batch_y, lambd) grad_w, grad_b = self.__compute_grad(batch_x, y_hat, batch_y, lambd) self.weights = self.weights - lr_decay * grad_w self.bias = self.bias - lr_decay * grad_b if epoch % 100 == 0: prediction = self.predict(train_x) > 0.5 accuracy = self.metrics_tracker.accuracy(train_y, prediction) print("Epoch: " + str(epoch)) print("Cost: " + str(cost)) print("Gradients (W, b): " + str(grad_w) + ", " + str(grad_b)) print("Weights: " + str(self.weights) + ", " + str(self.bias)) print("Accuracy: " + str(accuracy)) costs.append(cost) accuracies.append(accuracy) return accuracies, costs
def test(configuration: dict) -> None: device = configuration['device'] if device == 'gpu' and not torch.cuda.is_available(): print( "{0} You specified CUDA as device but PyTorch configuration does not support CUDA" .format(WARNING)) print("{0} Setting device to cpu".format(WARNING)) configuration['device'] = 'cpu' # initialize seed for random generation utilities init_seed(options=configuration) test_model_path = Path(configuration["save_model_path"] + "/" + configuration["model_name"] + "/" + configuration["test_model"]) model = ProtoNetTUF.build_network(encoder=linear_with_softmax( in_features=configuration["in_features"], out_features=len(configuration["classes"])), options=configuration) model.load_state_dict(torch.load(test_model_path)) train_dataset = TUFDataset(filename=Path(configuration["test_dataset"]), dataset_type="test", classes=configuration["classes"]) print(f"{INFO} Test dataset size {len(train_dataset)} ") # number of samples for training # num_support_tr is the number of support points per class # num_query_tr is the number of query points per class num_samples = configuration["num_support_tr"] + configuration[ "num_query_tr"] sampler = BatchSampler(labels=train_dataset.labels, classes_per_it=len(configuration["classes"]), num_samples=num_samples, iterations=configuration["iterations"], mode="train") dataloader = torch.utils.data.DataLoader(train_dataset, batch_sampler=sampler) '''
def train(configuration: dict) -> None: dirs = os.listdir(configuration["save_model_path"]) if configuration["model_name"] in dirs: raise ValueError(f"Directory {configuration['model_name']} exists") # create directory if it doesnt exist output_path = Path(configuration["save_model_path"] + "/" + configuration["model_name"]) # create the output directory os.mkdir(path=output_path) configuration["save_model_path"] = str(output_path) with open(output_path / "config.json", 'w', newline="\n") as fh: # save the configuration in the output json.dump(configuration, fh) device = configuration['device'] if device == 'gpu' and not torch.cuda.is_available(): print( "{0} You specified CUDA as device but PyTorch configuration does not support CUDA" .format(WARNING)) print("{0} Setting device to cpu".format(WARNING)) configuration['device'] = 'cpu' # initialize seed for random generation utilities init_seed(options=configuration) # the model to train model = ProtoNetTUF.build_network(encoder=convolution_with_linear_softmax( in_channels=2, out_channels=1, kernel_size=1, in_features=configuration["in_features"], out_features=len(configuration["classes"])), options=configuration) # initialize the optimizer optim = torch.optim.Adam( params=model.parameters(), lr=configuration["optimizer"]["lr"], weight_decay=configuration["optimizer"]["weight_decay"]) # initialize scheduler for learning rate decay # Decays the learning rate of each parameter group by gamma every step_size epochs. # Notice that such decay can happen simultaneously with other changes # to the learning rate from outside this scheduler. # When last_epoch=-1, sets initial lr as lr. lr_scheduler = torch.optim.lr_scheduler.StepLR( optimizer=optim, gamma=configuration["lr_scheduler"]["gamma"], step_size=configuration["lr_scheduler"]["step_size"]) train_dataset = TUFDataset(filename=Path(configuration["train_dataset"]), dataset_type="train", classes=configuration["classes"]) print(f"{INFO} Training dataset size {len(train_dataset)} ") # number of samples for training # num_support_tr is the number of support points per class # num_query_tr is the number of query points per class num_samples = configuration["num_support_tr"] + configuration[ "num_query_tr"] sampler = BatchSampler(labels=train_dataset.labels, classes_per_it=len(configuration["classes"]), num_samples=num_samples, iterations=configuration["iterations"], mode="train") dataloader = torch.utils.data.DataLoader(train_dataset, batch_sampler=sampler) # options for the training engine options = TrainEngine.build_options( optimizer=optim, lr_scheduler=lr_scheduler, max_epochs=configuration["max_epochs"], iterations=configuration["iterations"], device=configuration["device"], sample_loader=dataloader, num_support_tr=configuration["num_support_tr"]) options = extend_options_from_config(configuration=configuration, options=options) if configuration["validate"]: num_support_validation = configuration["num_support_validation"] num_query_validation = configuration["num_query_validation"] num_samples_validation = num_query_validation + num_support_validation print(f"{INFO} Number of samples validation {num_samples_validation}") validation_dataset = TUFDataset(filename=Path( configuration["validate_dataset"]), dataset_type="validate", classes=configuration["classes"]) print(f"{INFO} Validation dataset size {len(validation_dataset)} ") val_sampler = BatchSampler(labels=validation_dataset.labels, classes_per_it=len( configuration["classes"]), num_samples=num_samples_validation, iterations=configuration["iterations"], mode="validate") validation_dataloader = torch.utils.data.DataLoader( validation_dataset, batch_sampler=val_sampler) options["validation_dataloader"] = validation_dataloader options["num_support_validation"] = configuration[ "num_support_validation"] # train the model engine = TrainEngine(model=model) engine.train(options=options) engine_state = engine.state x = [epoch for epoch in range(configuration["max_epochs"])] train_loss = engine_state["average_train_loss"] validation_loss = engine_state["average_validation_loss"] plt.plot(x, train_loss, 'r*', label="Train loss") plt.plot(x, validation_loss, 'bo', label="Validation loss") plt.xlabel("Epoch") plt.ylabel("Average Loss") plt.legend(loc="upper right") plt.title( "Train vs Validation loss. $\eta=${0}, Iterations/epoch {1}".format( configuration["optimizer"]["lr"], configuration["iterations"])) plt.savefig( Path(configuration["save_model_path"] + "/" + "train_validation_loss.png")) plt.close() train_acc = engine_state["average_train_acc"] validation_acc = engine_state["average_validation_acc"] plt.plot(x, train_acc, 'r*', label="Train accuracy") plt.plot(x, validation_acc, 'bo', label="Validation accuracy") plt.xlabel("Epoch") plt.ylabel("Average Accuracy") plt.legend(loc="upper right") plt.title("Train vs Validation accuracy. $\eta=${0}, Iterations/epoch {1}". format(configuration["optimizer"]["lr"], configuration["iterations"])) plt.savefig( Path(configuration["save_model_path"] + "/" + "train_validation_accuracy.png"))
list_file=label_path, lmdb_path=lmdb_path, train=True, transform=[transforms.ToTensor()], multi_scale=multi_scale) if not multi_scale: train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=12) else: train_loader = DataLoader(train_dataset, batch_sampler=BatchSampler( RandomSampler(train_dataset), batch_size, True, multiscale_step=1, img_sizes=list(range(640, 1024 + 1, 128))), pin_memory=True, num_workers=12) print('the dataset has %d images' % (len(train_dataset))) print('the batch_size is %d' % (batch_size)) num_iter = 0 # vis = visdom.Visdom() # win = vis.line(Y=np.array([0]), X=np.array([0])) # win_lr = vis.line(Y=np.array([learning_rate]),X=np.array([0])) net.train() loss_file = './total_loss.txt' f_write = open(loss_file, 'w')
def train(): ## setup torch.multiprocessing.set_sharing_strategy('file_system') if not os.path.exists(save_path): os.makedirs(save_path) ## model and loss logger.info('setting up backbone model and loss') if model == 'vggm': logger.info('model select vggm') net = vggm(num_class = args.num_class).cuda() else: logger.info('model select resnet50') net = EmbedNetwork(num_class = args.num_class).cuda() if(args.resume != ''): net.load_state_dict(torch.load(args.resume)) logger.info('fine-turn from {}'.format(args.resume)) net = nn.DataParallel(net) triplet_loss = TripletLoss(margin = None).cuda() # no margin means soft-margin softmax_criterion = torch.nn.CrossEntropyLoss() ## optimizer logger.info('creating optimizer') optim = AdamOptimWrapper(net.parameters(), lr = args.learning_rate, wd = 0, t0 = args.decay_start_iteration, t1 = args.train_iterations) ## dataloader selector = BatchHardTripletSelector() ds = VehicleID(img_dir, img_list, img_size = 256, is_train = True) logger.info('dataset OK') sampler = BatchSampler(ds, args.batch_p, args.batch_k) dl = DataLoader(ds, batch_sampler = sampler, num_workers = 4) diter = iter(dl) ## train logger.info('start training ...') loss_avg = [] loss_soft_avg = [] count = 0 t_start = time.time() while True: try: imgs, lbs, _, _ = next(diter) except StopIteration: diter = iter(dl) imgs, lbs, _, _ = next(diter) net.train() imgs = imgs.cuda() lbs = lbs.cuda() if model == 'vggm': embds, fc = net(imgs) else: embds, fc = net(imgs) anchor, positives, negatives = selector(embds, lbs) loss = triplet_loss(anchor, positives, negatives) loss_softmax = softmax_criterion(fc,lbs) loss_all = 0.5 * loss_softmax + loss optim.zero_grad() loss_all.backward() optim.step() loss_avg.append(loss.detach().cpu().numpy()) loss_soft_avg.append(loss_softmax.detach().cpu().numpy()) if count % 20 == 0 and count != 0: loss_avg = sum(loss_avg) / len(loss_avg) loss_soft_avg = sum(loss_soft_avg) / len(loss_soft_avg) t_end = time.time() time_interval = t_end - t_start logger.info('iter: {}, trip_loss: {:4f}, soft_loss: {:4f}, lr: {:4f}, time: {:3f}'.format(count, loss_avg, loss_soft_avg, optim.lr, time_interval)) loss_avg = [] loss_soft_avg = [] t_start = t_end if count % args.checkpoint_frequency ==0 and count != 0: logger.info('saving trained model') name = save_path + str(count) + model_name ver = 2 while(os.path.exists(name)): logger.info('model has exists') name = name + '_v'+str(ver) ver = ver + 1 torch.save(net.module.state_dict(), name) count += 1 if count == args.train_iterations: break ## dump model logger.info('saving trained model') name = save_path + str(count) + '_'+ model_name ver = 2 while(os.path.exists(name)): logger.info('model has exists') name = name + '_v'+str(ver) ver = ver + 1 torch.save(net.module.state_dict(), name) logger.info('everything finished')
def train(): ## setup torch.multiprocessing.set_sharing_strategy('file_system') if not os.path.exists('./res'): os.makedirs('./res') ## model and loss logger.info('setting up backbone model and loss') net = EmbedNetwork().cuda() net = nn.DataParallel(net) print(net) triplet_loss = TripletLoss( margin=0.3).cuda() # no margin means soft-margin BNNeck = ClassBlock(2048, 1501).cuda() BNNeck = nn.DataParallel(BNNeck) ## optimizer logger.info('creating optimizer') optim = AdamOptimWrapper(net.parameters(), lr=3e-4, wd=0, t0=15000, t1=25000) ## dataloader selector = BatchHardTripletSelector() ds = Market1501( 'E://graduation thesis//triplet-reid-pytorch-master//triplet-reid-pytorch-master//datasets//Market-1501-v15.09.15//Market-1501-v15.09.15//bounding_box_train', is_train=True) sampler = BatchSampler(ds, 9, 4) dl = DataLoader(ds, batch_sampler=sampler, num_workers=0) diter = iter(dl) ## train logger.info('start training ...') loss_avg = [] loss1_avg = [] loss2_avg = [] loss3_avg = [] count = 0 t_start = time.time() while True: try: imgs, lbs, _ = next(diter) except StopIteration: diter = iter(dl) imgs, lbs, _ = next(diter) #criterion = nn.CrossEntropyLoss().cuda() criterion = CrossEntropyLabelSmooth(num_classes=1501) center_criterion = CenterLoss(num_classes=1051, feat_dim=2048, use_gpu=True) net.train() imgs = imgs.cuda() lbs = lbs.cuda() # for name in net.state_dict(): # print("net parameters:", name) optim.zero_grad() embds = net(imgs) anchor, positives, negatives = selector(embds, lbs) BNNeck.train() # for name in BNNeck.state_dict(): # print("BNNeck parameters:", name) #print(BNNeck) classifier = [] classifier += [nn.Linear(2048, 1501)] classifier = nn.Sequential(*classifier) classifier.apply(weights_init_classifier) classifier = classifier.cuda() x = torch.squeeze(embds) BNNeck1 = BNNeck(x) #classifier = torch.autograd.Variable(classifier) classifier = classifier(BNNeck1) loss1 = triplet_loss(anchor, positives, negatives) loss2 = criterion(classifier, lbs) loss3 = center_criterion(embds, lbs) loss = loss1 + loss2 + 0.0005 * loss3 loss.backward() optim.step() loss_avg.append(loss.detach().cpu().numpy()) loss1_avg.append(loss1.detach().cpu().numpy()) loss2_avg.append(loss2.detach().cpu().numpy()) loss3_avg.append(loss3.detach().cpu().numpy()) #loss1_avg.append(loss1.detach().cpu().numpy()) if count % 20 == 0 and count != 0: loss_avg = sum(loss_avg) / len(loss_avg) loss1_avg = sum(loss1_avg) / len(loss1_avg) loss2_avg = sum(loss2_avg) / len(loss2_avg) loss3_avg = sum(loss3_avg) / len(loss3_avg) t_end = time.time() time_interval = t_end - t_start logger.info( 'iter: {}, loss1: {:4f}, loss2: {:4f}, loss3:{:4f}, loss: {:4f}, lr: {:4f}, time: {:3f}' .format(count, loss1_avg, loss2_avg, loss3_avg, loss_avg, optim.lr, time_interval)) loss_avg = [] loss1_avg = [] loss2_avg = [] loss3_avg = [] t_start = t_end count += 1 if count == 25000: break ## dump model logger.info('saving trained model') torch.save(net.module.state_dict(), './res/model.pkl') torch.save(BNNeck.module.state_dict(), './res/BNNeck.pkl') logger.info('everything finished')
def train(): ## setup torch.multiprocessing.set_sharing_strategy('file_system') if not os.path.exists('./res'): os.makedirs('./res') ## model and loss logger.info('setting up backbone model and loss') net = EmbedNetwork().cuda() net = nn.DataParallel(net) triplet_loss = TripletLoss( margin=None).cuda() # no margin means soft-margin ## optimizer logger.info('creating optimizer') optim = AdamOptimWrapper(net.parameters(), lr=3e-4, wd=0, t0=15000, t1=25000) ## dataloader selector = BatchHardTripletSelector() ds = Market1501('datasets/Market-1501-v15.09.15/bounding_box_train', is_train=True) sampler = BatchSampler(ds, 18, 4) dl = DataLoader(ds, batch_sampler=sampler, num_workers=4) diter = iter(dl) ## train logger.info('start training ...') loss_avg = [] count = 0 t_start = time.time() while True: try: imgs, lbs, _ = next(diter) except StopIteration: diter = iter(dl) imgs, lbs, _ = next(diter) net.train() imgs = imgs.cuda() lbs = lbs.cuda() embds = net(imgs) anchor, positives, negatives = selector(embds, lbs) loss = triplet_loss(anchor, positives, negatives) optim.zero_grad() loss.backward() optim.step() loss_avg.append(loss.detach().cpu().numpy()) if count % 20 == 0 and count != 0: loss_avg = sum(loss_avg) / len(loss_avg) t_end = time.time() time_interval = t_end - t_start logger.info('iter: {}, loss: {:4f}, lr: {:4f}, time: {:3f}'.format( count, loss_avg, optim.lr, time_interval)) loss_avg = [] t_start = t_end count += 1 if count == 25000: break ## dump model logger.info('saving trained model') torch.save(net.module.state_dict(), './res/model.pkl') logger.info('everything finished')
lr = 0.001 lr_step = (num_epochs) // 5 lr_gamma = 0.5 # paths csv_path = '../hw4_data/train.csv' data_dir = '../hw4_data/train' val_csv_path = '../hw4_data/val.csv' val_data_dir = '../hw4_data/val' model_path = './models/best_model_M1_q3.pth' generator_path = './models/best_generator_M1_q3.pth' # In[27]: train_dataset = MiniDataset(csv_path, data_dir) train_sampler = BatchSampler('train', train_dataset.labels, N_way, sample_per_class, episodes) train_dataloader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler) # In[28]: testcase_csv = '../hw4_data/val_testcase.csv' test_dataset = MiniDataset(val_csv_path, val_data_dir) # fix random seeds for reproducibility SEED = 123 torch.manual_seed(SEED) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False random.seed(SEED)
class LogisticRegression(Predictor): def __init__(self): super().__init__() self.weights = None self.bias = None self.batch_sampler = None self.metrics_tracker = MetricsTracker() def train(self, train_x, train_y, nb_epochs=1000, batch_size=1000, lr=0.1, lambd=0.000): self.weights = np.zeros(shape=(train_x.shape[0], 1)) self.bias = 0 self.batch_sampler = BatchSampler(batch_size) costs = [] accuracies = [] for epoch in range(nb_epochs): lr_decay = lr * (nb_epochs - epoch) / nb_epochs batch_x, batch_y = self.batch_sampler.sample(train_x, train_y) print("Shape batch_x: " + str(batch_x.shape)) print("Shape train_y: " + str(batch_y.shape)) y_hat = self.predict(batch_x) cost = self.__compute_cost(y_hat, batch_y, lambd) grad_w, grad_b = self.__compute_grad(batch_x, y_hat, batch_y, lambd) self.weights = self.weights - lr_decay * grad_w self.bias = self.bias - lr_decay * grad_b if epoch % 100 == 0: prediction = self.predict(train_x) > 0.5 accuracy = self.metrics_tracker.accuracy(train_y, prediction) print("Epoch: " + str(epoch)) print("Cost: " + str(cost)) print("Gradients (W, b): " + str(grad_w) + ", " + str(grad_b)) print("Weights: " + str(self.weights) + ", " + str(self.bias)) print("Accuracy: " + str(accuracy)) costs.append(cost) accuracies.append(accuracy) return accuracies, costs def predict(self, X): a = np.dot(self.weights.T, X) + self.bias return self.__sigmoid(a) def __compute_cost(self, y_hat, y, lambd): eps = 10e-5 return -np.mean(y * np.log(y_hat + eps) + (1 - y) * np.log(1 - y_hat + eps) ) + 0.5 * lambd * np.sum(self.weights**2) def __compute_grad(self, X, y_hat, Y, lambd): m = X.shape[1] grad_w = (1 / m) * np.dot(X, (y_hat - Y).T) + lambd * self.weights grad_b = (1 / m) * np.sum(y_hat - Y) print("Shape grad_w: " + str(grad_w.shape)) print("Shape grad_b: " + str(grad_b.shape)) assert (grad_w.shape == self.weights.shape) return grad_w, grad_b def __sigmoid(self, a): return 1 / (1 + np.exp(-a))
class MLP(Predictor): def __init__(self, n_x, n_hidden_layers, neurons_per_layer, n_output, activations): super().__init__() self.initialize_parameters(n_x, n_hidden_layers, neurons_per_layer, n_output) self.batch_sampler = None self.activations = activations self.act_dict = {"sigmoid": self.__sigmoid, "relu": self.__relu, "linear": self.__linear} self.metrics_tracker = MetricsTracker() self.n_layers = n_hidden_layers + 2 assert(len(activations) == n_hidden_layers + 1) def train(self, train_x, train_y, nb_epochs = 1000, batch_size = 1000, lr = 0.1): self.batch_sampler = BatchSampler(batch_size) costs = [] accuracies = [] for epoch in range(nb_epochs): lr_decay = lr * (nb_epochs - epoch)/nb_epochs batch_x, batch_y = self.batch_sampler.sample(train_x, train_y) print("Shape batch_x: " + str(batch_x.shape)) print("Shape train_y: " + str(batch_y.shape)) cost, y_hat = self.__forward_prop(batch_x, batch_y) grad = self.__backward_prop(batch_x, batch_y, y_hat) self.learning_update(grad, lr) if epoch%100 == 0: prediction = self.predict(train_x) > 0.5 accuracy = self.metrics_tracker.accuracy(train_y, prediction) print("Epoch: " + str(epoch)) print("Cost: " + str(cost)) print("Gradients (W, b): " + str(grad_w)+ ", " + str(grad_b)) print("Weights: " + str(self.weights) + ", " + str(self.bias)) print("Accuracy: " + str(accuracy)) costs.append(cost) accuracies.append(accuracy) return accuracies, costs def initialize_parameters(self, n_x, n_hidden_layers, neurons_per_layer, n_output): assert(len(neurons_per_layer) == n_hidden_layers + 1) W0 = np.random.rand(shape = (neurons_per_layer[0], n_x)) b0 = np.zeros(shape = (neurons_per_layer[0], 1)) self.weights = {"W0": W0, "b0": b0} for i in range(n_hidden_layers): Wi = np.random.rand(shape = (neurons_per_layer[i + 1], neurons_per_layer[i])) bi = np.zeros(shape = ((neurons_per_layer[i + 1], 1))) self.weights["W" + str(i+1)] = Wi self.weights["b" + str(i+1)] = bi W_output = np.random.rand(shape = (n_output, neurons_per_layer[n_hidden_layers - 1])) b_output = np.zeros(shape = ((n_output, 1))) self.weights["W" + str(n_hidden_layers+1)] = W_output self.weights["b" + str(n_hidden_layers+1)] = b_output def __sigmoid(self, x): return 1 / (1 + np.exp(-x)) def __relu(self, x): return np.max(0, x, axis=1) def __linear(self, x): return x def __forward_prop(batch_x, batch_y): A_last = batch_x for i in range(self.n_layers): Wi = self.weights["W" + str(i)] bi = self.weights["b" + str(i)] Z = Wi * A_last + bi A_output = self.act_dict[self.activations[i]] A_last = A_output m = batch_x.shape[1] eps = 10e-5 cost = (-1/m) * np.sum(Y * np.log(A_output + eps) + (1 - Y)*np.log(1 - A_output + eps)) assert(cost.shape == ()) assert(A_output.shape == (1, m)) cost = np.squeeze(cost) return cost, A_output def __backward_prop(batch_x, batch_y, y_hat): def predict(self, X): a = np.dot(self.weights.T, X) + self.bias return self.__sigmoid(a) def __compute_cost(self, y_hat, y, lambd): eps = 10e-5 return - np.mean(y * np.log(y_hat + eps) + (1-y) * np.log(1 - y_hat + eps)) + 0.5*lambd*np.sum(self.weights**2) def __compute_grad(self, X, y_hat, Y, lambd): m = X.shape[1] grad_w = (1/m) * np.dot(X, (y_hat - Y).T) + lambd * self.weights grad_b = (1/m) * np.sum(y_hat - Y) print("Shape grad_w: " + str(grad_w.shape)) print("Shape grad_b: " + str(grad_b.shape)) assert(grad_w.shape == self.weights.shape) return grad_w, grad_b