def feature_importance_remove(X_train, Y_train, X_test, Y_test): from train_utils import build_model, get_dataset, fit_model models = [] n_features = X_train.shape[-1] feature_nb_train = [] feature_nb_test = [] for i in range(n_features): print(i) ix = np.delete(np.arange(n_features), i) model = build_model(input_shape=(n_features - 1, ), num_layers=3) batch_size = 30000 adult_train = get_dataset(X_train[:, ix], Y_train, batch_size=batch_size) adult_test = get_dataset(X_test[:, ix], Y_test) EPOCHS = 1000 history = fit_model(model, X_train[:, ix], Y_train, EPOCHS, batch_size=batch_size, verbose=0) # file_name="../temp_store/models/adult-{}.h5".format(i) # model.load_weights(file_name) models.append(model) loss, acc = model.evaluate(adult_train) feature_nb_train.append(acc) loss, acc = model.evaluate(adult_test) feature_nb_test.append(acc) return models, feature_nb_train, feature_nb_test
def main(): unique_chars_amount = config['vocab_size'] embedding_dim = config['embedding'] rnn_units = config['units'] n_layers = config['layers'] # Building & Loading our model print("Starting to build the model.") model = build_model(unique_chars_amount, embedding_dim, rnn_units, 1, n_layers) model.load_weights(tf.train.latest_checkpoint(args.path)) model.build(tf.TensorShape([1, None])) print("Model is ready.") # If we're looping, ask for prime and generate text while args.loop: prime = input("\nEnter prime (Or empty enter to exit): ").replace( "\\n", "\n") if not prime: break print("Starting to generate text.\n") main_generate_text(model, prime) # If we're not looping, just generate the text else: print("Generating text.\n") # Generating main_generate_text(model, args.prime)
def main(dirname, dirname2): x_train, y_train, x_val, y_val = load_data(dirname) x_test, y_test = load_data_testing(dirname2) # print('\nTRAINING DATA X-') # print(x_train.shape) # print('\nTRAINING DATA Y-') # print(y_train.shape) # print('\nVALIDATION DATA X-') # print(x_val.shape) # print('\nVALIDATION DATA Y-') # print(y_val.shape) # print('\n') # print('\TEST DATA X-') # print(x_test.shape) # print('\nTEST DATA Y-') # print(y_test.shape) # print('\n') model = build_model(y_train.shape[1]) print('Training stage') print('==============') model.fit(x_train, y_train, epochs=200, batch_size=16, validation_data=(x_val, y_val)) score, acc = model.evaluate(x_test, y_test, batch_size=16, verbose=0) print('Test performance: accuracy={0}, loss={1}'.format(acc, score)) model.save('model.h5') model.summary()
def eval(config: dict, model_path='checkpoints/model_epoch30.pth'): ssd = build_model(config, is_test=True) ssd.load_state_dict(torch.load(model_path)) ssd.train(False) net = Predictor(ssd) data_transform = transforms.Compose([ transforms.ToRelativeBoxes(), transforms.Resize(config['image_size']), transforms.Scale(), transforms.ToTensor() ]) target_transform = MatchPrior(priors, config) val_set = CityscapesDataset(config, 'dataset/val', None, data_transform, target_transform, True) test_image = val_set.get_image(0) boxes, labels, conf, _, _ = net.predict(test_image) drawer = Draw(test_image) for i in range(boxes.shape[0]): top_left = tuple(boxes[i][:2].numpy().flatten()) bottom_right = tuple(boxes[i][2:].numpy().flatten()) drawer.rectangle((top_left, bottom_right)) test_image.save("predict.jpg")
def eval_disparity(config: dict, model_path='checkpoints/model_epoch30.pth'): ssd = build_model(config, is_test=True) ssd.load_state_dict(torch.load(model_path)) ssd.train(False) net = Predictor(ssd) data_transform = transforms.Compose([ transforms.ToRelativeBoxes(), transforms.Resize(config['image_size']), transforms.Scale(), transforms.ToTensor() ]) target_transform = MatchPrior(priors, config) val_set = CityscapesDataset(config, 'dataset/val', None, data_transform, target_transform, True) errors = [] for i, _ in enumerate(val_set): image = val_set.get_image(i) gt_disparity = val_set.get_disparity(i) _, _, _, _, disparity = net.predict(image) error = ((gt_disparity - disparity)**2).flatten() errors.append(error) errors = torch.cat(errors) print("RMSE: {}".format(math.sqrt(errors.mean().item())))
def eval(config: dict, model_path='checkpoints/model_epoch40.pth'): ssd = build_model(config, is_test=True) ssd.load_state_dict(torch.load(model_path)) ssd.train(False) net = Predictor(ssd) data_transform = transforms.Compose([ transforms.ToRelativeBoxes(), transforms.Resize(config['image_size']), transforms.Scale(), transforms.ToTensor() ]) target_transform = MatchPrior(priors, config) val_set = CityscapesDataset(config, 'dataset/val', None, data_transform, target_transform, True) for i, _ in enumerate(val_set): if i % 10 == 0: print("Image {}".format(i)) image = val_set.get_image(i) probs, boxes, disparity = net.predict(image) labels = torch.argmax(probs, dim=probs.dim() - 1) chosen_indices = [] for class_index in range(1, config['num_classes'] + 1): class_mask = labels == class_index # If there's no prediction in this class, skip the class if class_mask.long().sum() <= 0: continue class_probabilities = probs[class_mask, class_index] class_boxes = boxes[class_mask] class_indices = nms(class_boxes, class_probabilities, 0.5) chosen_indices.append(class_indices) chosen_indices = torch.cat(chosen_indices) probs = probs[chosen_indices] boxes = boxes[chosen_indices] box_drawer = Draw(image) for box in boxes: top_left = tuple(box[:2].numpy().flatten()) bottom_right = tuple(box[2:].numpy().flatten()) box_drawer.rectangle((top_left, bottom_right)) image.save('result.jpg') # TODO change to all image evaluation break
def main(dirname): x_train, y_train, x_test, y_test = load_data(dirname) print(y_train.shape) model = build_model(y_train.shape[1]) print(model.summary()) print('Training stage') print('==============') history = model.fit(x_train, y_train, epochs=250, batch_size=16, validation_data=(x_test, y_test)) score, acc = model.evaluate(x_test, y_test, batch_size=16, verbose=0) print('Test performance: accuracy={0}, loss={1}'.format(acc, score)) model.save('model.h5')
def get_original_models(seed, num_layers=3): model_orig_list = [] for i, f in enumerate(dataset_fs): Xtr, Xts, ytr, yts, Ztr, Zts = f(0, remove_z=False) X_test, X_train, Y_test, Y_train = prep_data(Xtr, Xts, ytr, yts) optimizer = tf.keras.optimizers.Adam(lr=0.01) n_features = X_train.shape[-1] batch_size = 30000 model_full = build_model(input_shape=(n_features), num_layers=num_layers, optimizer=optimizer, seed=seed) EPOCHS = 1000 history = fit_model(model_full, X_train, Y_train, EPOCHS, batch_size=batch_size, verbose=0) model_orig_list.append(model_full) return model_orig_list
def main(dirname): x_train, y_train, x_test, y_test = load_data(dirname) num_val_samples = (x_train.shape[0]) // 5 model = build_model(y_train.shape[1]) print('Training stage') print('==============') history = model.fit(x_train, y_train, epochs=100, batch_size=32, validation_data=(x_test, y_test)) score, acc = model.evaluate(x_test, y_test, batch_size=16, verbose=0) print('Test performance: accuracy={0}, loss={1}'.format(acc, score)) model.save('HPRmodelv4.h5')
def main(): # Get Command Line Arguments args = ParseCommandLine() #Print data directory print("Data directory: ", args.data_directory) #Print device used use_gpu = torch.cuda.is_available() and args.gpu if use_gpu: print("Training on GPU.") else: print("Training on CPU.") #Print out architecture and hyperparameters print("Architecture: {}".format(args.arch)) print("Learning rate: {}".format(args.learning_rate)) print("Hidden units: {}".format(args.hidden_units)) print("Epochs: {}".format(args.epochs)) #Print our dave_dir option if args.save_dir: print("Checkpoint save directory: {}".format(args.save_dir)) #-------------------------------------------------------------------- # Get data loaders train_loader, valid_loader, test_loader, class_to_idx = train_utils.load_data( args.data_directory) #-------------------------------------------------------------------- # Build the model model = train_utils.build_model(args.arch, args.hidden_units) criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), lr=args.learning_rate) model.class_to_idx = class_to_idx #-------------------------------------------------------------------- #Train the model train_utils.train_model(model, args.epochs, args.learning_rate, use_gpu, criterion, optimizer, train_loader, valid_loader) #-------------------------------------------------------------------- #Validation on the test set test_loss, accuracy = train_utils.validate_model(model, criterion, test_loader) print("Validation on the test set") print(f"Test accuracy: {accuracy:.2f}%") #-------------------------------------------------------------------- # Save the checkpoint if input_args.save_dir: save_checkpoint(args.arch, args.learning_rate, args.hidden_units, args.epochs, model, optimizer, args.save_directory)
def feature_importance_nulify(X_train, Y_train, X_test, Y_test, feature_idx=None, seed=49, num_layers=3): from train_utils import build_model, get_dataset, fit_model from datasets import nulify_feature models = [] n_features = X_train.shape[-1] if feature_idx is None: feature_idx = range(n_features) feature_nb_train = [] feature_nb_test = [] for i in feature_idx: print(i) # ix = np.delete(np.arange(n_features), i) model = build_model(input_shape=(n_features, ), num_layers=num_layers, seed=seed) batch_size = 10000 x, y = nulify_feature(X_train, Y_train, i) adult_train = get_dataset(x, y, batch_size=batch_size) EPOCHS = 1000 history = fit_model(model, x, y, EPOCHS=EPOCHS, batch_size=batch_size, verbose=0) x, y = nulify_feature(X_test, Y_test, i) adult_test = get_dataset(x, y) # file_name="../temp_store/models/adult-{}.h5".format(i) # model.load_weights(file_name) models.append(model) loss, acc = model.evaluate(adult_train) feature_nb_train.append(acc) loss, acc = model.evaluate(adult_test) feature_nb_test.append(acc) return models, feature_nb_train, feature_nb_test
def evaluate_pertrubed_models(X_train, Y_train, X_test, Y_test, e_alpha=0.25, feature_set=None, attack=no_attack, train_robust=False, model_orig=None, EPOCHS=1000, R_EPOCHS=100, batch_size=30000, seed=49, num_layers=3): n_features = X_train.shape[-1] adult_train = get_dataset(X_train, Y_train, batch_size=batch_size) adult_test = get_dataset(X_test, Y_test) optimizer = tf.keras.optimizers.Adam(lr=0.01) model_full = build_model(input_shape=(n_features), num_layers=num_layers, optimizer=optimizer, seed=seed) if train_robust: # TODO optimizer = tf.keras.optimizers.Adam(lr=0.01) for t in range(R_EPOCHS): adv_err, adv_loss = epoch_adversarial(adult_train, model_full, attack, epsilon=0.25, alpha=0.08, num_iter=30, optimizer=optimizer) else: if model_orig is None: print("WARNING") print("TRAINING MODEL FROM SCRATCH") history = fit_model(model_full, X_train, Y_train, EPOCHS, batch_size=batch_size, verbose=0) else: model_full.set_weights(model_orig.get_weights()) models_p = [] adv_lis = [] val_names = ("train_err", "test_err", "adv_err", "adv_err_fgsm", "e_loss", "e_loss_train") print(*("{}".format(i) for i in val_names), sep="\t") if feature_set is None: feature_set = range(n_features) for i in feature_set: print(i) z_idx = i model_explain = clone_model(model_full) optimizer = tf.keras.optimizers.Adam(lr=0.01) for t in range(50): train_err, train_loss = epoch_explanation( adult_train, model_explain, attack, sensitive_feature_id=z_idx, e_alpha=e_alpha, epsilon=0.25, alpha=0.08, num_iter=30, optimizer=optimizer) models_p.append(model_explain) adv_err, adv_err_f, e_loss, e_loss_train, test_err = epoch_eval( adult_train, adult_test, model_explain, z_idx) r = (train_err, test_err, adv_err, adv_err_f, e_loss, e_loss_train) adv_lis.append(r) print(*("{:.6f}".format(i) for i in r), sep="\t") return models_p, adv_lis
def load_model_lists(model_lists_names, file_dir, num_layers, seed, alpha=15.0): ''' :param model_lists_names: :param file_dir: :param num_layers: :param seed: :param alpha: :return: model_list[model][dataset] ''' from train_utils import build_model import tensorflow as tf file_dir = os.path.join(file_dir, "model_{}".format(num_layers), "seed_{}".format(seed)) model_dict = defaultdict(list) # for i,m_name in enumerate(model_lists_names): # model_dict[m_name] = [] f_nb_list = f_sensitive_list for i, f in enumerate(dataset_fs): optimizer = tf.keras.optimizers.Adam(lr=0.01) n_features = n_features_list[i] for j in range(len(f_nb_list[i])): sensitive_feature_id = f_nb_list[i][j] dataset_name = dataset_names[i] f_name = feature_name_dict[dataset_name][sensitive_feature_id] for k, model_list_name in enumerate(model_lists_names): file_name = "-".join( map(lambda x: "_".join(x.split(" ")), [ dataset_name, model_list_name, f_name, "alpha_{}".format(round(alpha, 0)) ])) # distinguish b/t a list of model which vary by feature and # the original model (does not vary by feature) if model_list_name != MODEL_ORIGINAL: file_name + "-feature_" + str(j) file_path = os.path.join(file_dir, file_name) model = build_model(input_shape=(n_features), num_layers=num_layers, optimizer=optimizer) model.load_weights(file_path) l = model_dict[model_list_name] if l == []: [l.append([]) for _ in range(len(dataset_fs))] model_dict[model_list_name][i].append(model) # print(file_path) else: file_path = os.path.join(file_dir, file_name) if j == 0: model = build_model(input_shape=(n_features), num_layers=num_layers, optimizer=optimizer) model.load_weights(file_path) model_dict[model_list_name].append(model) model_lists = [model_dict[m_name] for m_name in model_lists_names] return model_lists
def train_ssd(start_epoch: int, end_epoch: int, config: dict, use_gpu: bool = True, model_name='model', checkpoint_folder='checkpoints', log_folder='log', redirect_output=True): if not os.path.isdir(log_folder): os.makedirs(log_folder) priors = generate_priors(config) target_transform = MatchPrior(priors, config) train_transform = transforms.Compose([ transforms.CustomJitter(), transforms.ToOpenCV(), transforms.RandomMirror() ]) data_transform = transforms.Compose([ transforms.ToRelativeBoxes(), transforms.Resize(config['image_size']), transforms.Scale(), transforms.ToTensor() ]) train_set = CityscapesDataset(config, 'dataset/train', train_transform, data_transform, target_transform) train_loader = DataLoader(train_set, batch_size=32, shuffle=True, num_workers=4) ssd = build_model(config) if use_gpu: ssd = ssd.cuda() ssd.train(True) if os.path.isfile(os.path.join(checkpoint_folder, "{}_epoch{}.pth".format(model_name, start_epoch - 1))): ssd.load_state_dict( torch.load(os.path.join(checkpoint_folder, "{}_epoch{}.pth".format(model_name, start_epoch - 1)))) criterion = MultiBoxLoss(0.5, 0, 3, config) disparity_criterion = BerHuLoss() ssd_params = [ {'params': ssd.extractor.parameters()}, {'params': ssd.extras.parameters()}, {'params': itertools.chain(ssd.class_headers.parameters(), ssd.location_headers.parameters(), ssd.upsampling.parameters())} ] optimizer = SGD(ssd_params, lr=0.001, momentum=0.9, weight_decay=0.0005) lr_scheduler = CosineAnnealingLR(optimizer, 120, last_epoch= -1) if os.path.isfile(os.path.join(checkpoint_folder, "optimizer_epoch{}.pth".format(start_epoch - 1))): print("Loading previous optimizer") optimizer.load_state_dict( torch.load(os.path.join(checkpoint_folder, "optimizer_epoch{}.pth".format(start_epoch - 1)))) for epoch in range(start_epoch, end_epoch): lr_scheduler.step() running_loss = 0.0 running_regression_loss = 0.0 running_classification_loss = 0.0 running_disparity_loss = 0.0 num_steps = len(train_loader) aps = torch.zeros((config['num_classes'],)) running_map = 0 if redirect_output: sys.stdout = open(os.path.join(log_folder, 'train_epoch_{}.txt'.format(epoch)), 'w') for i, batch in enumerate(train_loader): images, gt_locations, labels, gt_disparity = batch if use_gpu: images = images.cuda() gt_locations = gt_locations.cuda() labels = labels.cuda() gt_disparity = gt_disparity.cuda() optimizer.zero_grad() confidences, locations, disparity = ssd(images) regression_loss, classification_loss = criterion.forward(confidences, locations, labels, gt_locations) disparity_loss = disparity_criterion.forward(disparity, gt_disparity) loss = regression_loss + classification_loss + 10 * disparity_loss loss.backward() optimizer.step() running_loss += loss.item() running_regression_loss += regression_loss.item() running_classification_loss += classification_loss.item() running_disparity_loss += disparity_loss with torch.no_grad(): boxes = convert_locations_to_boxes(locations, priors.cuda(), config['variance'][0], config['variance'][1]) gt_boxes = convert_locations_to_boxes(gt_locations, priors.cuda(), config['variance'][0], config['variance'][1]) batch_map, batch_ap = calculate_map(confidences, labels, boxes, gt_boxes) running_map += batch_map aps += batch_ap avg_loss = running_loss / num_steps avg_reg_loss = running_regression_loss / num_steps avg_class_loss = running_classification_loss / num_steps avg_disp_loss = running_disparity_loss / num_steps mean_ap = running_map / num_steps epoch_ap = aps / num_steps print("Epoch {}".format(epoch)) print("Average Loss: {:.2f}".format(avg_loss)) print("Average Regression Loss: {:.2f}".format(avg_reg_loss)) print("Average Classification Loss: {:.2f}".format(avg_class_loss)) print("Average Disparity Loss: {:.2f}".format(avg_disp_loss)) print("Average mAP: {:.2f}".format(mean_ap)) print("Average AP per class: {}".format(epoch_ap)) torch.save(ssd.state_dict(), os.path.join(checkpoint_folder, "{}_epoch{}.pth".format(model_name, epoch))) torch.save(optimizer.state_dict(), os.path.join(checkpoint_folder, "optimizer_epoch{}.pth".format(epoch))) if sys.stdout != sys.__stdout__: sys.stdout.close() sys.stdout = sys.__stdout__