def main(): train_ds = MyImageFolder(root_dir="train/", transform=config.train_transforms) val_ds = MyImageFolder(root_dir="val/", transform=config.val_transforms) train_loader = DataLoader(train_ds, batch_size=config.BATCH_SIZE, num_workers=config.NUM_WORKERS, pin_memory=config.PIN_MEMORY, shuffle=True) val_loader = DataLoader(val_ds, batch_size=config.BATCH_SIZE, num_workers=config.NUM_WORKERS, pin_memory=config.PIN_MEMORY, shuffle=True) loss_fn = nn.CrossEntropyLoss() model = Net(net_version="b0", num_classes=10).to(config.DEVICE) optimizer = optim.Adam(model.parameters(), lr=config.LEARNING_RATE) scaler = torch.cuda.amp.GradScaler() if config.LOAD_MODEL: load_checkpoint(torch.load("my_checkpoint.pth.tar"), model, optimizer) make_prediction(model, config.val_transforms, 'test/', config.DEVICE) check_accuracy(val_loader, model, config.DEVICE) for epoch in range(config.NUM_EPOCHS): train_fn(train_loader, model, optimizer, loss_fn, scaler, config.DEVICE) check_accuracy(val_loader, model, config.DEVICE) checkpoint = { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } save_checkpoint(checkpoint)
def apply_KMM(trainX, trainY, testX, testY, window, source_pos, target_pos): # Decision Tree print("\n Kernel Mean Matching (Huang et al., 2006) ") classifier = ImportanceWeightedClassifier(iwe='kmm', loss="dtree") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_DT_KMM, acc_DT_KMM_INFO = check_accuracy(testY, pred_naive) # Logistic Regression classifier = ImportanceWeightedClassifier(iwe='kmm', loss="logistic") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_LR_KMM, acc_LR_KMM_INFO = check_accuracy(testY, pred_naive) # Naive Bayes Bernoulli classifier = ImportanceWeightedClassifier(iwe='kmm', loss="berno") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_NB_KMM, acc_NB_KMM_INFO = check_accuracy(testY, pred_naive) # return pd.DataFrame( [{ 'window': window, 'source_position': source_pos, 'target_position': target_pos, 'acc_LR_KMM': acc_LR_KMM, 'acc_LR_KMM_INFO': str(acc_LR_KMM_INFO), 'acc_DT_KMM': acc_DT_KMM, 'acc_DT_KMM_INFO': str(acc_DT_KMM_INFO), 'acc_NB_KMM': acc_NB_KMM, 'acc_NB_KMM_INFO': str(acc_NB_KMM_INFO), }] )
def apply_SA(trainX, trainY, testX, testY, window, source_pos, target_pos): # Decision Tree print("\n Subspace Alignment (Fernando et al., 2013) ") classifier = SubspaceAlignedClassifier(loss="dtree") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_DT_SA, acc_DT_SA_INFO = check_accuracy(testY, pred_naive) # Logistic Regression print("\n Subspace Alignment (Fernando et al., 2013) ") classifier = SubspaceAlignedClassifier(loss="logistic") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_LR_SA, acc_LR_SA_INFO = check_accuracy(testY, pred_naive) # Naive Bayes Bernoulli print("\n Subspace Alignment (Fernando et al., 2013) ") classifier = SubspaceAlignedClassifier(loss="berno") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_NB_SA, acc_NB_SA_INFO = check_accuracy(testY, pred_naive) # return pd.DataFrame( [{ 'window': window, 'source_position': source_pos, 'target_position': target_pos, 'acc_LR_SA': acc_LR_SA, 'acc_LR_SA_INFO': str(acc_LR_SA_INFO), 'acc_DT_SA': acc_DT_SA, 'acc_DT_SA_INFO': str(acc_DT_SA_INFO), 'acc_NB_SA': acc_NB_SA, 'acc_NB_SA_INFO': str(acc_NB_SA_INFO), }] )
def main(): model = UNET(in_channels=1, out_channels=1).to(device=DEVICE) loss_fn = nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) train_loader, val_loader = get_loaders(TRAIN_DIR, VAL_DIR, BATCH_SIZE, NUM_WORKER, PIN_MEMORY) if LOAD_MODEL: load_checkpoint(torch.load("mycheckpoint.pth.tar"), model) scaler = torch.cuda.amp.GradScaler() for epoch in range(NUM_EPOCHS): train_fn(train_loader, model, optimizer, loss_fn, scaler) # TODO : save model checkpoint = { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } save_checkpoint(checkpoint) # TODO : check acuuracy check_accuracy(val_loader, model, device=DEVICE) # TODO : Print results to folder save_predictions_as_imgs(val_loader, model, folder='saved_imgs/')
def apply_NN(trainX, trainY, testX, testY, window, source_pos, target_pos): # Decision Tree print("\n Nearest-neighbour-based weighting (Loog, 2015) ") classifier = ImportanceWeightedClassifier(iwe='nn', loss="dtree") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_DT_NN, acc_DT_NN_INFO = check_accuracy(testY, pred_naive) # Logistic Regression print("\n Nearest-neighbour-based weighting (Loog, 2015) ") classifier = ImportanceWeightedClassifier(iwe='nn', loss="logistic") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_LR_NN, acc_LR_NN_INFO = check_accuracy(testY, pred_naive) # Naive Bayes Bernoulli print("\n Nearest-neighbour-based weighting (Loog, 2015) ") classifier = ImportanceWeightedClassifier(iwe='nn', loss="berno") classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_NB_NN, acc_NB_NN_INFO = check_accuracy(testY, pred_naive) # return pd.DataFrame( [{ 'window': window, 'source_position': source_pos, 'target_position': target_pos, 'acc_LR_NN': acc_LR_NN, 'acc_LR_NN_INFO': str(acc_LR_NN_INFO), 'acc_DT_NN': acc_DT_NN, 'acc_DT_NN_INFO': str(acc_DT_NN_INFO), 'acc_NB_NN': acc_NB_NN, 'acc_NB_NN_INFO': str(acc_NB_NN_INFO), }] )
def main(): train_transform = A.Compose([ A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH), A.Rotate(limit=35, p=1.0), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.1), A.Normalize( mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], max_pixel_value=255.0, ), ToTensorV2(), ], ) val_transforms = A.Compose([ A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH), A.Normalize( mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], max_pixel_value=255.0, ), ToTensorV2(), ], ) model = UNET(3, 1).to(DEVICE) loss_fn = nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) train_loader, val_loader = get_loaders( TRAIN_IMG_DIR, TRAIN_MASK_DIR, VAL_IMG_DIR, VAL_MASK_DIR, BATCH_SIZE, train_transform, val_transforms, NUM_WORKERS, PIN_MEMORY, ) check_accuracy(val_loader, model, device=DEVICE) scaler = torch.cuda.amp.GradScaler() for epoch in range(NUM_EPOCHS): train_fn(train_loader, model, optimizer, loss_fn, scaler) checkpoint = { "state_dict": model.state_dict(), "optimizer": optimizer.state_dict(), } save_checkpoint(checkpoint) # check accuracy check_accuracy(val_loader, model, device=DEVICE) # print some examples to a folder save_predictions_as_imgs(val_loader, model, folder="saved_images/", device=DEVICE)
def main(): train_transform = A.Compose([ A.Resize(height=config.IMAGE_HEIGHT, width=config.IMAGE_WIDTH), A.Rotate(limit=35, p=1.0), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.1), A.normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], max_pixel_value=255.0), ToTensorV2, ]) val_transform = A.Compose([ A.Resize(height=config.IMAGE_HEIGHT, width=config.IMAGE_WIDTH), A.normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], max_pixel_value=255.0), ToTensorV2, ]) model = UNet(in_channels=3, out_channels=1).to(config.DEVICE) loss_fn = nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), lr=config.LEARNING_RATE) train_loader, val_loader = get_loaders( config.TRAIN_IMAGE_DIR, config.TRAIN_MASK_DIR, config.VAL_IMG_DIR, config.VAL_MASK_DIR, config.BATCH_SIZE, train_transform, val_transform, ) if config.LOAD_MODEL: load_checkpoint(torch.load('my_checkpoint.pth.tar'), model) scaler = torch.cuda.amp.GradScaler() for epoch in range(config.NUM_EPOCHS): train_fn(train_loader, model, optimizer, loss_fn, scaler) # save model checkpoint = { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), } save_checkpoint(checkpoint) # check acc check_accuracy(val_loader, model, device=config.DEVICE) # print some examples to a folder save_predictions_as_imgs(val_loader, model, folder='saved_images', device=config.DEVICE)
def train(seq_len, window_size, model_type, params, batch_size=16, num_epochs=20, print_every=5): metrics = [] max_val_f2_score = 0. best_model = None train_data, validation_data = load_pytorch_data(seq_len, window_size) if model_type == 'LSTM': model = SimpleLSTM(INPUT_SIZE, params['lstm_hidden_size']) elif model_type == 'CNN': model = SimpleCNN(int(HZ * seq_len), params['cnn_hidden_size']) else: raise Exception('invalid model type') optimizer = torch.optim.Adam(model.parameters(), lr=params['lr']) criterion = torch.nn.CrossEntropyLoss( weight=torch.tensor(params['loss_weights'])) print('starting training!') for epoch in range(num_epochs): print('starting epoch {}...'.format(epoch)) for iter, (X_batch, y_batch, idx) in enumerate(train_data): X_batch = X_batch.float() y_batch = y_batch.long() output = model(X_batch) output = torch.squeeze(output, 0) loss = criterion(output, y_batch) optimizer.zero_grad() loss.backward() optimizer.step() if iter % print_every == 0: # print('Iter {} loss: {}'.format(iter, loss.item())) f1_val, f2_val, precision_val, recall_val, accuracy_val = check_accuracy( model, validation_data, False) f1_train, f2_train, precision_train, recall_train, accuracy_train = check_accuracy( model, train_data, False) train_loss = loss.item() metrics.append( '{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}'.format( train_loss, f1_val, f2_val, precision_train, recall_val, accuracy_val, f1_train, f2_train, precision_train, recall_train, accuracy_train)) if f2_val > max_val_f2_score: max_val_f2_score = f2_val best_model = copy.deepcopy(model) print('finished training!') return best_model, max_val_f2_score, metrics
def apply_notl(trainX, trainY, testX, testY, window, source_pos, target_pos): ####################### ### SEMI-SUPERVISED ### ######################## # Label Propagation label_prop_model = LabelPropagation(kernel='knn') label_prop_model.fit(trainX, trainY) Y_Pred = label_prop_model.predict(testX); acc_ss_propagation, acc_ss_propagation_INFO = check_accuracy(testY, Y_Pred) # Label Spreading label_prop_models_spr = LabelSpreading(kernel='knn') label_prop_models_spr.fit(trainX, trainY) Y_Pred = label_prop_models_spr.predict(testX); acc_ss_spreading, acc_ss_spreading_INFO = check_accuracy(testY, Y_Pred) ######################## #### WITHOUT TL ######## ######################## # LogisticRegression modelLR = LogisticRegression() modelLR.fit(trainX, trainY) predLR = modelLR.predict(testX) accLR, acc_LR_INFO = check_accuracy(testY, predLR) # DecisionTreeClassifier modelDT = tree.DecisionTreeClassifier() modelDT.fit(trainX, trainY) predDT = modelDT.predict(testX) accDT, acc_DT_INFO = check_accuracy(testY, predDT) # BernoulliNB modelNB = BernoulliNB() modelNB.fit(trainX, trainY) predND = modelNB.predict(testX) accNB, acc_NB_INFO = check_accuracy(testY, predND) # return pd.DataFrame( [{ 'window': window, 'source_position': source_pos, 'target_position': target_pos, 'acc_SS_propagation': acc_ss_propagation, 'acc_SS_propagation_INFO':acc_ss_propagation_INFO, 'acc_SS_spreading': acc_ss_spreading, 'acc_SS_spreading_INFO':acc_ss_spreading_INFO, 'acc_LR':accLR, 'acc_LR_INFO': str(acc_LR_INFO), 'acc_DT': accDT, 'acc_DT_INFO': str(acc_DT_INFO), 'acc_NB': accNB, 'acc_NB_INFO': str(acc_NB_INFO) }] )
def main(): train_transforms = A.Compose([ A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH), A.Rotate(limit=35, p=1.0), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.1), A.Normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], max_pixel_value=255.0), ToTensorV2() ]) val_transforms = A.Compose([ A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH), A.Normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], max_pixel_value=255.0), ToTensorV2() ]) model = UNET(in_channels=3, out_channels=1).to(DEVICE) loss_fn = nn.BCEWithLogitsLoss() loss_fn = ComboLoss({'bce': 0.4, 'dice': 0.5, 'focal': 0.1}) optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) train_loader, val_loader = get_loaders(TRAIN_IMG_DIR, TRAIN_MASK_DIR, VAL_IMG_DIR, VAL_MASK_DIR, BATCH_SIZE, train_transforms, val_transforms) if LOAD_MODEL: load_checkpoint(torch.load('checkpoint.pth'), model) scaler = torch.cuda.amp.GradScaler() for epoch in range(NUM_EPOCHS): train_fn(train_loader, model, optimizer, loss_fn, scaler) #save model checkpoint = { "state_dict": model.state_dict(), "optimizer": optimizer.state_dict() } # save_checkpoint(checkpoint) #check accuracy check_accuracy(val_loader, model, DEVICE) #print some example to a folder save_predictions_as_imgs(val_loader, model, device=DEVICE)
def train_test_classifier(): w = ut.train_model(x_train, y_train, x_control_train, loss_function, apply_fairness_constraints, apply_accuracy_constraint, sep_constraint, sensitive_attrs, sensitive_attrs_to_cov_thresh, gamma) train_score, test_score, correct_answers_train, correct_answers_test = ut.check_accuracy( w, x_train, y_train, x_test, y_test, None, None) distances_boundary_test = (np.dot(x_test, w)).tolist() all_class_labels_assigned_test = np.sign(distances_boundary_test) correlation_dict_test = ut.get_correlations( None, None, all_class_labels_assigned_test, x_control_test, sensitive_attrs) cov_dict_test = ut.print_covariance_sensitive_attrs( None, x_test, distances_boundary_test, x_control_test, sensitive_attrs) p_rule = ut.print_classifier_fairness_stats([test_score], [correlation_dict_test], [cov_dict_test], sensitive_attrs[0]) eq_op_acc, chance_bin_zero, chance_bin_one = ut.get_eq_op_acc( w, x_train, y_train, x_control_train, None) eq_odds_acc = ut.get_eq_odds_acc(w, x_train, y_train, x_control_train, None) pred_rate_par_acc = ut.get_pred_rate_par_acc(w, x_train, y_train, x_control_train, None) demo_par_acc_f_cons = ut.get_dem_par_acc(w, x_train, y_train, x_control_train, None) return w, p_rule, test_score, eq_op_acc, eq_odds_acc, pred_rate_par_acc, demo_par_acc_f_cons
def train(self, X, y, x_sensitive, fairness_constraint): self.x_sensitive = {"s1": x_sensitive} self.X = ut.add_intercept(X) self.y = y if fairness_constraint == -1.0: self.w = ut.train_model(self.X, self.y, self.x_sensitive, lf._logistic_loss, 0, 0, 0, ["s1"], {"s1": 0}, None) else: self.w = ut.train_model(self.X, self.y, self.x_sensitive, lf._logistic_loss, 1, 0, 0, ["s1"], {"s1": fairness_constraint}, None) train_score, test_score, correct_answers_train, correct_answers_test = ut.check_accuracy( self.w, self.X, self.y, self.X, self.y, None, None) distances_boundary_test = (np.dot(self.X, self.w)).tolist() all_class_labels_assigned_test = np.sign(distances_boundary_test) correlation_dict_test = ut.get_correlations( None, None, all_class_labels_assigned_test, self.x_sensitive, ["s1"]) correlation_dict = ut.get_avg_correlation_dict([correlation_dict_test]) non_prot_pos = correlation_dict["s1"][1][1] prot_pos = correlation_dict["s1"][0][1] p_rule = (prot_pos / non_prot_pos) * 100.0 return self.w, p_rule, 100.0 * test_score
def train(self, X, y, x_sensitive, fairness_constraint): self.x_sensitive = {"s1": x_sensitive} self.X = ut.add_intercept(X) self.y = y if fairness_constraint==-1.0: self.w = ut.train_model(self.X, self.y, self.x_sensitive, lf._logistic_loss, 0, 0, 0, ["s1"], {"s1":0}, None) else: self.w = ut.train_model(self.X, self.y, self.x_sensitive, lf._logistic_loss, 1, 0, 0, ["s1"], {"s1": fairness_constraint}, None) train_score, test_score, correct_answers_train, correct_answers_test = ut.check_accuracy(self.w, self.X, self.y, self.X, self.y, None, None) distances_boundary_test = (np.dot(self.X, self.w)).tolist() all_class_labels_assigned_test = np.sign(distances_boundary_test) correlation_dict_test = ut.get_correlations(None, None, all_class_labels_assigned_test, self.x_sensitive, ["s1"]) correlation_dict = ut.get_avg_correlation_dict([correlation_dict_test]) non_prot_pos = correlation_dict["s1"][1][1] prot_pos = correlation_dict["s1"][0][1] p_rule = (prot_pos / non_prot_pos) * 100.0 return self.w, p_rule, 100.0*test_score
def main(): args = ParseArgs() model = args.model dataset_name = args.dataset_name lambda_ = args.lambda_ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') trainloader, testloader, num_classes = Dataset(dataset_name) model = torch.load(args.ckpt).to(device) weights = [w for name, w in model.named_parameters() if "weight" in name] num_features = sum([w.numel() for w in weights]) criterion = torch.nn.CrossEntropyLoss() F, f, norm_l1_x = compute_F( trainloader, model, weights, criterion, lambda_) density = sum([torch.sum(w != 0).item() for w in weights]) / num_features accuracy = check_accuracy(model, testloader) print('F:', F) print('f:', f) print('density:', density) print('validation accuracy:', accuracy)
def get_clf_stats(w, x_train, y_train, x_control_train, x_test, y_test, x_control_test, sensitive_attrs): # compute distances to boundaries distances_boundary_train = get_distance_boundary(w, x_train, None) distances_boundary_test = get_distance_boundary(w, x_test, None) # compute the class labels all_class_labels_assigned_train = np.sign(distances_boundary_train) all_class_labels_assigned_test = np.sign(distances_boundary_test) train_score, test_score, correct_answers_train, correct_answers_test = ut.check_accuracy(None, x_train, y_train, x_test, y_test, all_class_labels_assigned_train, all_class_labels_assigned_test) cov_all_train = {} cov_all_test = {} print "\n" print "Overall Accuracy: %0.3f" % (test_score) for s_attr in sensitive_attrs: s_attr_to_fp_fn_train = get_fpr_fnr_sensitive_features(y_train, all_class_labels_assigned_train, x_control_train, sensitive_attrs, False) cov_all_train[s_attr] = get_sensitive_attr_constraint_fpr_fnr_cov(None, x_train, y_train, distances_boundary_train, x_control_train[s_attr]) print_stats = True # only print stats for the test fold s_attr_to_fp_fn_test = get_fpr_fnr_sensitive_features(y_test, all_class_labels_assigned_test, x_control_test, sensitive_attrs, False) cov_all_test[s_attr] = get_sensitive_attr_constraint_fpr_fnr_cov(None, x_test, y_test, distances_boundary_test, x_control_test[s_attr]) return train_score, test_score, cov_all_train, cov_all_test, s_attr_to_fp_fn_train, s_attr_to_fp_fn_test
def part_loss(output, target, loss_f, join_channels=slice(0, 3), normal_channels=slice(3, None), train_reduced=False): output_normal = output[:, normal_channels] target_normal = target[:, normal_channels] max_output, _ = output[:, join_channels].max(1, keepdim=True) output = torch.cat((output_normal, max_output), dim=1) max_target, _ = target[:, join_channels].max(1, keepdim=True) target = torch.cat((target_normal, max_target), dim=1) acc = check_accuracy(output, target) return (loss_f(output, target) if train_reduced else loss_f(output_normal, target_normal)), acc
def train_test_classifier(): w = ut.train_model(x_train, y_train, x_control_train, loss_function, apply_fairness_constraints, apply_accuracy_constraint, sep_constraint, sensitive_attrs, sensitive_attrs_to_cov_thresh, gamma) train_score, test_score, correct_answers_train, correct_answers_test = ut.check_accuracy(w, x_train, y_train, x_test, y_test, None, None) distances_boundary_test = (np.dot(x_test, w)).tolist() all_class_labels_assigned_test = np.sign(distances_boundary_test) correlation_dict_test = ut.get_correlations(None, None, all_class_labels_assigned_test, x_control_test, sensitive_attrs) cov_dict_test = ut.print_covariance_sensitive_attrs(None, x_test, distances_boundary_test, x_control_test, sensitive_attrs) p_rule = ut.print_classifier_fairness_stats([test_score], [correlation_dict_test], [cov_dict_test], sensitive_attrs[0]) return w, p_rule, test_score
def get_clf_stats(w, x_train, y_train, x_control_train, x_test, y_test, x_control_test, sensitive_attrs): assert (len(sensitive_attrs) == 1 ) # ensure that we have just one sensitive attribute sensitive_attrs = list(sensitive_attrs) s_attr = sensitive_attrs[ 0] # for now, lets compute the accuracy for just one sensitive attr # compute distance from boundary distances_boundary_train = get_distance_boundary(w, x_train, x_control_train[s_attr]) distances_boundary_test = get_distance_boundary(w, x_test, x_control_test[s_attr]) # compute the class labels all_class_labels_assigned_train = np.sign(distances_boundary_train) all_class_labels_assigned_test = np.sign(distances_boundary_test) train_score, test_score, correct_answers_train, correct_answers_test = ut.check_accuracy( None, x_train, y_train, x_test, y_test, all_class_labels_assigned_train, all_class_labels_assigned_test) cov_all_train = {} cov_all_test = {} for s_attr in sensitive_attrs: print_stats = False # we arent printing the stats for the train set to avoid clutter # uncomment these lines to print stats for the train fold # print "*** Train ***" # print "Accuracy: %0.3f" % (train_score) # print_stats = True s_attr_to_fp_fn_train = get_fpr_fnr_sensitive_features( y_train, all_class_labels_assigned_train, x_control_train, sensitive_attrs, print_stats) cov_all_train[s_attr] = get_sensitive_attr_constraint_fpr_fnr_cov( None, x_train, y_train, distances_boundary_train, x_control_train[s_attr]) #print("\n") #print("Accuracy: %0.3f" % (test_score)) print_stats = False # only print stats for the test fold s_attr_to_fp_fn_test = get_fpr_fnr_sensitive_features( y_test, all_class_labels_assigned_test, x_control_test, sensitive_attrs, print_stats) cov_all_test[s_attr] = get_sensitive_attr_constraint_fpr_fnr_cov( None, x_test, y_test, distances_boundary_test, x_control_test[s_attr]) #print("\n") return train_score, test_score, cov_all_train, cov_all_test, s_attr_to_fp_fn_train, s_attr_to_fp_fn_test
def main(): model = UNET(in_channels=3, out_channels=1).to(config.DEVICE) BCE = nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), lr=config.LEARNING_RATE) train_loader, val_loader = get_loaders( train_dir=config.TRAIN_IMG_DIR, train_mask_dir=config.TRAIN_MASK_DIR, val_dir=config.VAL_IMG_DIR, val_mask_dir=config.VAL_MASK_DIR, batch_size=config.BATCH_SIZE, train_transform=config.train_transform, val_transform=config.val_transform, num_workers=config.NUM_WORKERS, pin_memory=config.PIN_MEMORY, ) if config.LOAD_MODEL: load_checkpoint(torch.load(config.CHECKPOINT_PTH), model) check_accuracy(val_loader, model) scaler = torch.cuda.amp.GradScaler() for epoch in range(config.NUM_EPOCHS): train_fn(train_loader, model, optimizer, BCE, scaler, val_loader) # save model if config.SAVE_MODEL: checkpoint = { "state_dict": model.state_dict(), "optimizer": optimizer.state_dict(), } save_checkpoint(checkpoint) # check accuracy check_accuracy(val_loader, model) # print some example save_predictions_as_imgs(val_loader, model, folder=config.SAVE_IMAGES)
def train_test_classifier(): w = ut.train_model(x_train, y_train, x_control_train, loss_function, apply_fairness_constraints, apply_accuracy_constraint, sep_constraint, sensitive_attrs, sensitive_attrs_to_cov_thresh, gamma) train_score, test_score, correct_answers_train, correct_answers_test = ut.check_accuracy(w, x_train, y_train, x_test, y_test, None, None) distances_boundary_test = np.dot(x_test, w) distances_boundary_train = np.dot(x_train, w) prob_test = [sigmoid(x) for x in distances_boundary_test] prob_train = [sigmoid(x) for x in distances_boundary_train] all_class_labels_assigned_test = np.sign(distances_boundary_test) correlation_dict_test = ut.get_correlations(None, None, all_class_labels_assigned_test, x_control_test, sensitive_attrs) cov_dict_test = ut.print_covariance_sensitive_attrs(None, x_test, distances_boundary_test, x_control_test, sensitive_attrs) p_rule = ut.print_classifier_fairness_stats([test_score], [correlation_dict_test], [cov_dict_test], sensitive_attrs[0]) # return w, p_rule, test_score return prob_train, prob_test
def main(): model = EfficientNet.from_pretrained("efficientnet-b7") model._fc = nn.Linear(2560, 1) train_dataset = CatDog(root="data/train/", transform=config.basic_transform) test_dataset = CatDog(root="data/test/", transform=config.basic_transform) train_loader = DataLoader( train_dataset, shuffle=True, batch_size=config.BATCH_SIZE, num_workers=config.NUM_WORKERS, pin_memory=True, ) test_loader = DataLoader( test_dataset, shuffle=False, batch_size=config.BATCH_SIZE, num_workers=config.NUM_WORKERS, ) model = model.to(config.DEVICE) scaler = torch.cuda.amp.GradScaler() loss_fn = nn.BCEWithLogitsLoss() optimizer = optim.Adam( model.parameters(), lr=config.LEARNING_RATE, weight_decay=config.WEIGHT_DECAY ) if config.LOAD_MODEL and config.CHECKPOINT_FILE in os.listdir(): load_checkpoint(torch.load(config.CHECKPOINT_FILE), model) for epoch in range(config.NUM_EPOCHS): train_one_epoch(train_loader, model, loss_fn, optimizer, scaler) check_accuracy(train_loader, model, loss_fn) if config.SAVE_MODEL: checkpoint = {"state_dict": model.state_dict(), "optimizer": optimizer.state_dict()} save_checkpoint(checkpoint, filename=config.CHECKPOINT_FILE) save_feature_vectors(model, train_loader, output_size=(1, 1), file="train_b7") save_feature_vectors(model, test_loader, output_size=(1, 1), file="test_b7")
def apply_TCA(trainX, trainY, testX, testY, window, source_pos, target_pos): #################################################### ### Transfer Component Analysis (Pan et al, 2009) ### # Decision Tree print("\n Transfer Component Analysis (Pan et al, 2009)") classifier = TransferComponentClassifier(loss="dtree", num_components=1) classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_DT_TCA, acc_DT_TCA_INFO = check_accuracy(testY, pred_naive) # Logistic Regression classifier = TransferComponentClassifier(loss="logistic", num_components=100) classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_LR_TCA, acc_LR_TCA_INFO = check_accuracy(testY, pred_naive) # Naive Bayes Bernoulli classifier = TransferComponentClassifier(loss="berno", num_components=1, l2=100, bandwidth=10, order=100.0, mu=100.0) classifier.fit(trainX, trainY, testX) pred_naive = classifier.predict(testX) acc_NB_TCA, acc_NB_TCA_INFO = check_accuracy(testY, pred_naive) return pd.DataFrame( [{ 'window': window, 'source_position': source_pos, 'target_position': target_pos, 'acc_LR_TCA': acc_LR_TCA, 'acc_LR_TCA_INFO': str(acc_LR_TCA_INFO), 'acc_DT_TCA': acc_DT_TCA, 'acc_DT_TCA_INFO': str(acc_DT_TCA_INFO), 'acc_NB_TCA': acc_NB_TCA, 'acc_NB_TCA_INFO': str(acc_NB_TCA_INFO), }] )
def main(): train_transform = tr.Compose([ tr.Resize((160, 240)), tr.ToTensor(), tr.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ]) model = UNET(in_channels=3, out_channels=3).to(DEVICE) loss_fn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) train_loader, val_loader = get_loaders( TRAIN_IMG_DIR, TRAIN_BLUR_DIR, BATCH_SIZE, train_transform=train_transform, num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY, ) if LOAD_MODEL: load_checkpoint(torch.load("my_checkpoint.pth.tar"), model) check_accuracy(val_loader, model, device=DEVICE) scaler = torch.cuda.amp.GradScaler() for epoch in range(NUM_EPOCHS): train_model(train_loader, model, optimizer, loss_fn, scaler) checkpoint = { "state_dict": model.state_dict(), "optimizer": optimizer.state_dict(), } save_checkpoint(checkpoint) check_accuracy(val_loader, model, device=DEVICE)
def apply_ENSEMBLE(trainX, trainY, testX, testY, window, source_pos, target_pos): classifier_SA_DT = SubspaceAlignedClassifier(loss="dtree") classifier_SA_LR = SubspaceAlignedClassifier(loss="logistic") classifier_SA_NB = SubspaceAlignedClassifier(loss="berno") classifier_TCA_DT = TransferComponentClassifier(loss="dtree") classifier_TCA_LR = TransferComponentClassifier(loss="logistic") classifier_TCA_NB = TransferComponentClassifier(loss="berno") classifier_NN_DT = ImportanceWeightedClassifier(iwe='nn', loss="dtree") classifier_NN_LR = ImportanceWeightedClassifier(iwe='nn', loss="logistic") classifier_NN_NB = ImportanceWeightedClassifier(iwe='nn', loss="berno") classifier_KMM_DT = ImportanceWeightedClassifier(iwe='kmm', loss="dtree") classifier_KMM_LR = ImportanceWeightedClassifier(iwe='kmm', loss="logistic") classifier_KMM_NB = ImportanceWeightedClassifier(iwe='kmm', loss="berno") # eclf = EnsembleClassifier(clfs=[ #classifier_SA_DT, #classifier_SA_LR, #classifier_SA_NB, #classifier_TCA_DT, #classifier_TCA_LR, classifier_TCA_NB, classifier_NN_DT, #classifier_NN_LR, #classifier_NN_NB, classifier_KMM_DT, classifier_KMM_LR, #classifier_KMM_NB ]) eclf.fit(trainX, trainY, testX) pred = eclf.predict(testX) acc_ENSEMBLE, acc_ENSEMBLE_INFO = check_accuracy(testY, pred) # return pd.DataFrame( [{ 'window': window, 'source_position': source_pos, 'target_position': target_pos, 'acc_ENSEMBLE': acc_ENSEMBLE, 'acc_ENSEMBLE_INFO': acc_ENSEMBLE_INFO, }] )
def get_clf_stats(w, x_train, y_train, x_control_train, x_test, y_test, x_control_test, sensitive_attrs): assert(len(sensitive_attrs) == 1) # ensure that we have just one sensitive attribute s_attr = sensitive_attrs[0] # for now, lets compute the accuracy for just one sensitive attr # compute distance from boundary distances_boundary_train = get_distance_boundary(w, x_train, x_control_train[s_attr]) distances_boundary_test = get_distance_boundary(w, x_test, x_control_test[s_attr]) # compute the class labels all_class_labels_assigned_train = np.sign(distances_boundary_train) all_class_labels_assigned_test = np.sign(distances_boundary_test) train_score, test_score, correct_answers_train, correct_answers_test = ut.check_accuracy(None, x_train, y_train, x_test, y_test, all_class_labels_assigned_train, all_class_labels_assigned_test) cov_all_train = {} cov_all_test = {} for s_attr in sensitive_attrs: print_stats = False # we arent printing the stats for the train set to avoid clutter # uncomment these lines to print stats for the train fold # print "*** Train ***" # print "Accuracy: %0.3f" % (train_score) # print_stats = True s_attr_to_fp_fn_train = get_fpr_fnr_sensitive_features(y_train, all_class_labels_assigned_train, x_control_train, sensitive_attrs, print_stats) cov_all_train[s_attr] = get_sensitive_attr_constraint_fpr_fnr_cov(None, x_train, y_train, distances_boundary_train, x_control_train[s_attr]) print "\n" print "Accuracy: %0.3f" % (test_score) print_stats = True # only print stats for the test fold s_attr_to_fp_fn_test = get_fpr_fnr_sensitive_features(y_test, all_class_labels_assigned_test, x_control_test, sensitive_attrs, print_stats) cov_all_test[s_attr] = get_sensitive_attr_constraint_fpr_fnr_cov(None, x_test, y_test, distances_boundary_test, x_control_test[s_attr]) print "\n" return train_score, test_score, cov_all_train, cov_all_test, s_attr_to_fp_fn_train, s_attr_to_fp_fn_test
def main(): parser = ArgumentParser(description='Validate model on .png files') parser.add_argument('-m', type=str, required=False, default=None, help='Model to validate') parser.add_argument('input_dir', type=str, help='Input directory') parser.add_argument('target_dir', type=str, help='Output directory') args = parser.parse_args() device = get_device() model, _, _ = load_model(load_config().model if args.m is None else args.m, device=device) loss_f = BCELoss() count, loss_sum, acc_sum = 0, 0, 0 with torch.no_grad(): for fn in listdir(args.input_dir): input_path = join(args.input_dir, fn) target_path = join(args.target_dir, fn) if fn.endswith('.png') and isfile(input_path) and isfile( target_path): print('Validating %s with target %s' % (input_path, target_path)) data = image_to_tensor(imread(input_path), device=device) target = image_to_probs(imread(target_path), device=device) data = model(data).squeeze(0) loss = loss_f(data, target).item() acc = check_accuracy(data, target) print('Loss %f, acc %s' % (loss, acc_to_str(acc))) count += 1 acc_sum += acc loss_sum += loss print('\nSUMMARY\nLoss %f, acc %s' % (loss_sum / count, acc_to_str(acc_sum))) print(acc_to_details(acc_sum))
def main(train_dir, val_dir, checkpoint_dir, batch_size, image_size=512, num_epochs=10, checkpoint_name=None, num_workers=1, pin_memory=True, log_dir="logs", model_name=None, train_csv=None, val_csv=None): # declare datasets train_ds = DataFolder(root_dir=train_dir, transform=transform(image_size, is_training=True), csv_path=train_csv) val_ds = DataFolder(root_dir=val_dir, transform=transform(image_size, is_training=False), csv_path=val_csv) train_loader = DataLoader(train_ds, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory, shuffle=True) val_loader = DataLoader(val_ds, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory, shuffle=True) #init model model = MainModel(128, model_name) # configure parameter loss_fn = nn.CrossEntropyLoss() model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=1e-4) scaler = torch.cuda.amp.GradScaler() # checkpoint = {'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()} # save_checkpoint(checkpoint, os.path.join(checkpoint_dir, f"checkpoint_initialilze.pth.tar")) # return if checkpoint_name: ckp_path = os.path.join(checkpoint_dir, checkpoint_name) load_checkpoint(torch.load(ckp_path), model, optimizer) check_accuracy(val_loader, model, device) #training for epoch in range(num_epochs): train_fn(train_loader, model, optimizer, loss_fn, scaler, device, epoch, log_dir=log_dir) check_accuracy(val_loader, model, device) checkpoint = { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } save_checkpoint( checkpoint, os.path.join(checkpoint_dir, f"checkpoint_{epoch}.pth.tar"))
x.resize_(x.size()[0], 1, x.size()[1]) x, y = x.float(), y.long() x, y = x.to(device), y.to(device) # loss and predictions scores = model(x) loss = loss_fn(scores, y) writer.add_scalar('loss', loss.item()) # print and save loss per 'print_every' times if (t + 1) % opt.print_every == 0: print('t = %d, loss = %.4f' % (t + 1, loss.item())) # parameters update optimizer.zero_grad() loss.backward() optimizer.step() # save epoch loss and acc to train or val history train_acc, _ = check_accuracy(model, tr_loader, device) val_acc, _ = check_accuracy(model, val_loader, device) # writer acc and weight to tensorboard writer.add_scalars('acc', { 'train_acc': train_acc, 'val_acc': val_acc }, epoch) for name, param in model.named_parameters(): writer.add_histogram(name, param.clone().cpu().data.numpy(), epoch) # save the best model if val_acc > best_acc: best_acc = val_acc best_model_wts = copy.deepcopy(model.state_dict()) t1 = time.time() print(t1 - t0) # print results
def main(): # TODO: Might be worth trying the normalization from assignment 2 train_transform = A.Compose([ A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH), A.Rotate(limit=35, p=1.0), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.1), A.Normalize( mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], max_pixel_value=255.0, ), ToTensorV2(), ], ) val_transforms = A.Compose([ A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH), A.Normalize( mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], max_pixel_value=255.0, ), ToTensorV2(), ], ) model = UNET(in_channels=3, out_channels=1).to(DEVICE) """ We're using with logitsLoss because we're not using sigmoid on the, final output layer. If we wanted to have several output channels, we'd change the loss_fn to a cross entropy loss instead. """ loss_fn = nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) train_loader, val_loader = get_loaders( TRAIN_IMG_DIR, TRAIN_MASK_DIR, VAL_IMG_DIR, VAL_MASK_DIR, BATCH_SIZE, train_transform, val_transforms, NUM_WORKERS, PIN_MEMORY, ) if LOAD_MODEL: load_checkpoint(torch.load("my_checkpoint.pth.tar"), model) scaler = torch.cuda.amp.GradScaler( ) # Scales the gradients to avoid underflow. Requires a GPU for epoch in range(NUM_EPOCHS): train_fn(train_loader, model, optimizer, loss_fn, scaler) # save model checkpoint = { "state_dict": model.state_dict(), "optimizer": optimizer.state_dict(), } save_checkpoint(checkpoint) # check accuracy check_accuracy(val_loader, model, device=DEVICE) # print some examples to a folder save_predictions_as_imgs(val_loader, model, folder="saved_images/", device=DEVICE)
trained_models = [] # testing only for file_name in files_out: f = deepcopy(file_name) f += ".pkl" model = model_fromPickle(f) if model is not None: trained_models.append(model) # testing only f = deepcopy(file_name) f += ".csv" output_files(model, ds1Test, f) ########## testing only ##################################### for index, model in enumerate(trained_models): accuracy = check_accuracy(model, set1_xTest, set1_yTest) print("Model {}: {}".format(index, accuracy)) ############################################################## # create dict to store some meta-data on params and performance after tuning in case we want access # later for summary statistics best = {'naive bayes': {}, 'decision trees': {}, 'random forests': {}} # Get classifier and meta data on Naive Bayes for ds1 bern, best['naive bayes'] = bernoulli(set1_xTrain, set1_yTrain, set1_xTest, set1_yTest, data_set1=True, combined=combined_ds1) output_files(
def main(): args = parseArgs() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') trainloader, testloader, num_classes = Dataset( args.dataset_name, args.batch_size) model = selectModel(args.model) model = model.to(device) weights = [w for name, w in model.named_parameters() if "weight" in name] num_features = sum([w.numel() for w in weights]) num_samples = len(trainloader) * trainloader.batch_size criterion = nn.CrossEntropyLoss() if args.optimizer == 'obproxsg': optimizer = OBProxSG(model.parameters(), lr=args.learning_rate, lambda_=args.lambda_, epochSize=len(trainloader), Np=5, No=5) elif args.optimizer == 'obproxsg_plus': optimizer = OBProxSG(model.parameters(), lr=args.learning_rate, lambda_=args.lambda_, epochSize=len(trainloader), Np=int(args.max_epoch/10)) elif args.optimizer == 'proxsg': optimizer = ProxSG(model.parameters(), lr=args.learning_rate, lambda_=args.lambda_) elif args.optimizer == 'proxsvrg': optimizer = ProxSVRG(model.parameters(), lr=args.learning_rate, lambda_=args.lambda_, epochSize=len(trainloader)) elif args.optimizer == 'rda': optimizer = RDA(model.parameters(), lambda_=args.lambda_, gamma=20, epochSize=len(trainloader)) if args.optimizer != 'rda': scheduler = StepLR(optimizer, step_size=60, gamma=0.1) os.makedirs('results', exist_ok=True) setting = '%s_%s_%s_%E' % ( args.optimizer, args.model, args.dataset_name, args.lambda_) csvname = os.path.join('results', setting + '.csv') print('Results are saving to the CSV file: %s.' % csvname) csvfile = open(csvname, 'w', newline='') fieldnames = ['epoch', 'F_value', 'f_value', 'norm_l1_x', 'density', 'validation_acc', 'train_time'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=",") writer.writeheader() alg_start_time = time.time() epoch = 0 while True: epoch_start_time = time.time() if epoch >= args.max_epoch: break if args.optimizer == 'proxsvrg': optimizer.zero_grad() for index, (X, y) in enumerate(trainloader): X = X.to(device) y = y.to(device) y_pred = model.forward(X) f1 = criterion(y_pred, y) f1.backward() optimizer.init_epoch() for index, (X, y) in enumerate(trainloader): X = X.to(device) y = y.to(device) # calculate grad_f_i optimizer.set_weights_as_xs() y_pred = model.forward(X) f = criterion(y_pred, y) optimizer.zero_grad() f.backward() optimizer.save_grad_f() # calculate grad_f_hat_i optimizer.set_weights_as_hat_x() y_pred = model.forward(X) f = criterion(y_pred, y) optimizer.zero_grad() f.backward() optimizer.save_grad_f_hat() optimizer.update_xs() optimizer.step() else: for _, (X, y) in enumerate(trainloader): X = X.to(device) y = y.to(device) y_pred = model.forward(X) f = criterion(y_pred, y) optimizer.zero_grad() f.backward() optimizer.step() if args.optimizer != 'rda': scheduler.step() epoch += 1 train_time = time.time() - epoch_start_time F, f, norm_l1_x = compute_F( trainloader, model, weights, criterion, args.lambda_) density = sum([torch.sum(w != 0).item() for w in weights]) / num_features accuracy = check_accuracy(model, testloader) writer.writerow({'epoch': epoch, 'F_value': F, 'f_value': f, 'norm_l1_x': norm_l1_x, 'density': density, 'validation_acc': accuracy, 'train_time': train_time}) csvfile.flush() print("Epoch {}: {:2f}seconds ...".format(epoch, train_time)) alg_time = time.time() - alg_start_time writer.writerow({'train_time': alg_time / epoch}) os.makedirs('checkpoints', exist_ok=True) torch.save(model, os.path.join('checkpoints', setting + '.pt')) csvfile.close()
type=int, default=3, help='No of epochs to train the model') parser.add_argument('--gpu', default='gpu', help='Determine GPU vs CPU for the neural network') args = parser.parse_args() data_dir = args.data_dir save_dir = args.save_dir arch = args.arch learning_rate = args.learning_rate hidden_units = args.hidden_units epochs = args.epochs gpu = args.gpu if gpu == 'gpu': is_gpu = True else: is_gpu = False image_datasets, dataloaders = load_data(data_dir) model = build_model(arch, hidden_units, is_gpu) model = train_model(model, dataloaders['training'], dataloaders['validation'], epochs, learning_rate, is_gpu) accuracy = check_accuracy(model, dataloaders['test'], is_gpu) print("Accuracy: {:.2f}%".format(accuracy * 100)) save_checkpoint(model, hidden_units, epochs, image_datasets['training'], save_dir)
y = y.to(config.DEVICE).view(-1).float() # forward scores = model(x).view(-1) loss = loss_fn(scores, y) losses.append(loss.item()) # backward optimizer.zero_grad() loss.backward() # gradient descent or adam step optimizer.step() print(f"Loss: {sum(losses)/len(losses)}") if config.SAVE_MODEL: checkpoint = { "state_dict": model.state_dict(), "optimizer": optimizer.state_dict() } save_checkpoint(checkpoint, filename="linear.pth.tar") preds, labels = check_accuracy(loader_val, model) print(cohen_kappa_score(labels, preds, weights="quadratic")) preds, labels = check_accuracy(loader, model) print(cohen_kappa_score(labels, preds, weights="quadratic")) make_prediction(model, loader_test, "test_preds.csv")