def load_data(batch_size): train_dataset = AdultDataset(training_data, training_labels) valid_dataset = AdultDataset(validation_data, validation_labels) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) val_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False) return train_loader, val_loader
def load_data(batch_size): trainDataSet = AdultDataset(trainingData, trainingLabel) validationDataSet = AdultDataset(validationData, validationLabel) train_loader = DataLoader(trainDataSet, batch_size=batch_size, shuffle=True) val_loader = DataLoader(validationDataSet, batch_size=batch_size, shuffle=True) return train_loader, val_loader
def load_data(batch_size): ###### # 3.2 YOUR CODE HERE train_dataset = AdultDataset(feat_train,label_train) val_dataset = AdultDataset(feat_valid,label_valid) ###### train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle= True) val_loader = DataLoader(val_dataset, batch_size = batch_size, shuffle = False) return train_loader, val_loader
def load_data(batch_size, train_data, val_data, train_labels, val_labels): ###### DO I GIVE THE VALIDATION SET A BATCH SIZE? # 3.2 YOUR CODE HERE train_data = AdultDataset(train_data, train_labels) val_data = AdultDataset(val_data, val_labels) train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False) ###### return train_loader, val_loader
def load_data(batch_size=5): ###### train_loader = DataLoader(AdultDataset(X_train, Y_train), batch_size=batch_size, shuffle=True) val_loader = DataLoader(AdultDataset(X_test, Y_test), batch_size=1, shuffle=True) ###### return train_loader, val_loader
def load_data(batch_size): training_set = AdultDataset(X_train, y_train) train_loader = DataLoader(training_set, batch_size=batch_size, shuffle=True) validation_set = AdultDataset(X_test, y_test) val_loader = DataLoader(validation_set, batch_size=batch_size, shuffle=True) return train_loader, val_loader
def load_data(batchsize): ###### # 3.2 YOUR CODE HERE training_data = AdultDataset(training_features, training_income) validation_data = AdultDataset(validation_features, validation_income) train_loader = DataLoader(training_data, batch_size=batchsize, shuffle=True) val_loader = DataLoader(validation_data, batch_size=batchsize) ###### return train_loader, val_loader
def load_data(batch_size): ###### # 4.2 YOUR CODE HERE traindata = AdultDataset(feat_train, label_train) validdata = AdultDataset(feat_valid, label_valid) torch.manual_seed(seed) train_loader = DataLoader(traindata, batch_size=batch_size, shuffle=True) val_loader = DataLoader(validdata, batch_size=batch_size, shuffle=False) ###### return train_loader, val_loader
def load_data(batch_size): ###### # 4.1 YOUR CODE HERE trainDataset = AdultDataset(trainData, trainLabel) valDataset = AdultDataset(valData, valLabel) train_loader = DataLoader(dataset=trainDataset, batch_size=batch_size, shuffle=True) val_loader = DataLoader(dataset=valDataset, batch_size=batch_size, shuffle=True) ###### return train_loader, val_loader
def load_data(batch_size, lr): train_dataset = AdultDataset(X_train, y_train) train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=1, shuffle=True) test_dataset = AdultDataset(X_test, y_test) test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=1, shuffle=False) loss_fnc = torch.nn.BCELoss() model = MultiLayerPerceptron(X_train.shape[1]) optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=lr) return train_loader, test_loader, model, loss_fnc, optimizer
torch.set_num_threads(8) logger = get_logger(args.name) # Set random number seed. np.random.seed(args.seed) torch.manual_seed(args.seed) dtype = np.float32 logger.info( "UCI Adult data set, target attribute: {}, sensitive attribute: {}".format( args.target, args.private)) # Load UCI Adult dataset. time_start = time.time() adult_train = AdultDataset(root_dir='data', phase='train', tar_attr=args.target, priv_attr=args.private) adult_test = AdultDataset(root_dir='data', phase='test', tar_attr=args.target, priv_attr=args.private) train_loader = DataLoader(adult_train, batch_size=args.batch_size, shuffle=True) test_loader = DataLoader(adult_test, batch_size=args.batch_size, shuffle=False) time_end = time.time() logger.info( "Time used to load all the data sets: {} seconds.".format(time_end - time_start)) input_dim = adult_train.xdim num_classes = 2
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batch_size', type=int) parser.add_argument('--lr', type=float) parser.add_argument('--epochs', type=int, default=20) parser.add_argument('--eval_every', type=int, default=10) args = parser.parse_args() ###### train_loader, val_loader = load_data(args.batch_size) model, loss_fnc, optimizer = load_model(args.lr) t_loss, v_loss, t_acc, v_acc = [], [], [], [] for epoch in range(args.epochs): n = args.eval_every printI = 1 lastN = [] for datum, label in train_loader: # set up optimizer with blank gradients optimizer.zero_grad() # predict with the current weights predict = model(datum.float()) # evaluate the loss for the predictions loss = loss_fnc(input=predict.squeeze(), target=label.float()) # calculate the gradients per the loss function we chose loss.backward() # changes the weights one step in the right direction (for the batch) optimizer.step() if len(lastN) < n: lastN += [[datum, label]] else: lastN = lastN[1:] + [[datum, label]] if printI == n: t_loss += [loss.item()] predict = model(X_test.float()) v_loss += [ loss_fnc(input=predict.squeeze(), target=Y_test.float()).item() ] t_acc_this = 0 for every in range(n): t_acc_this += evaluate( model, DataLoader( AdultDataset(lastN[every][0], lastN[every][1]))) t_acc += [t_acc_this / n] v_acc += [ evaluate(model, DataLoader(AdultDataset(X_test, Y_test))) ] printI = 1 print("Status update: currently on epoch", epoch, "and Loss is", loss) print("Number of correct predictions is ", t_acc[-1] * len(Y_train)) print("Validation accuracy is currently: ", v_acc[-1]) printI += 1 if plotting: # Plot losses plt.figure() x = [i for i in range(len(t_loss))] plt.plot(x, t_loss, label='Training Loss') plt.plot(x, v_loss, label='Validation Loss') plt.title("Losses as Function of Gradient Step") plt.ylabel("Loss") plt.xlabel("Gradient Steps") plt.legend() plt.show() # Plot accuracies x = [i for i in range(len(t_acc))] plt.figure() plt.plot(x, t_acc, label='Training Accuracy') plt.plot(x, v_acc, label='Validation Accuracy') plt.ylim(0, 1) plt.title("Accuracy as Function of Gradient Step") plt.ylabel("Accuracy") plt.xlabel("Gradient Steps") plt.legend() plt.show() # Plot losses smoothed plt.figure() x = [i for i in range(len(t_loss))] plt.plot(x, smooth(t_loss, 151, 3), label='Training Loss Smoothed') plt.plot(x, smooth(v_loss, 101, 4), label='Validation Loss') plt.title("Smoothed Losses as Function of Gradient Step") plt.ylabel("Loss") plt.xlabel("Gradient Steps") plt.legend() plt.show() # Plot accuracies smoothed plt.figure() plt.plot(x, smooth(t_acc, 151, 5), label='Training Accuracy Smoothed') plt.plot(x, smooth(v_acc, 101, 4), label='Validation Accuracy Smoothed') plt.ylim(0, 1) plt.title("Smoothed Accuracy as Function of Gradient Step") plt.ylabel("Accuracy") plt.xlabel("Gradient Steps") plt.legend() plt.show()