Example #1
0
def feature_importance_remove(X_train, Y_train, X_test, Y_test):
    from train_utils import build_model, get_dataset, fit_model
    models = []
    n_features = X_train.shape[-1]
    feature_nb_train = []
    feature_nb_test = []

    for i in range(n_features):
        print(i)
        ix = np.delete(np.arange(n_features), i)
        model = build_model(input_shape=(n_features - 1, ), num_layers=3)
        batch_size = 30000
        adult_train = get_dataset(X_train[:, ix],
                                  Y_train,
                                  batch_size=batch_size)
        adult_test = get_dataset(X_test[:, ix], Y_test)
        EPOCHS = 1000
        history = fit_model(model,
                            X_train[:, ix],
                            Y_train,
                            EPOCHS,
                            batch_size=batch_size,
                            verbose=0)
        #     file_name="../temp_store/models/adult-{}.h5".format(i)
        #     model.load_weights(file_name)
        models.append(model)
        loss, acc = model.evaluate(adult_train)
        feature_nb_train.append(acc)
        loss, acc = model.evaluate(adult_test)
        feature_nb_test.append(acc)
    return models, feature_nb_train, feature_nb_test
Example #2
0
def main():
    unique_chars_amount = config['vocab_size']
    embedding_dim = config['embedding']
    rnn_units = config['units']
    n_layers = config['layers']

    # Building & Loading our model
    print("Starting to build the model.")
    model = build_model(unique_chars_amount, embedding_dim, rnn_units, 1,
                        n_layers)
    model.load_weights(tf.train.latest_checkpoint(args.path))
    model.build(tf.TensorShape([1, None]))
    print("Model is ready.")

    # If we're looping, ask for prime and generate text
    while args.loop:
        prime = input("\nEnter prime (Or empty enter to exit): ").replace(
            "\\n", "\n")
        if not prime:
            break
        print("Starting to generate text.\n")
        main_generate_text(model, prime)
    # If we're not looping, just generate the text
    else:
        print("Generating text.\n")
        # Generating
        main_generate_text(model, args.prime)
Example #3
0
def main(dirname, dirname2):
    x_train, y_train, x_val, y_val = load_data(dirname)
    x_test, y_test = load_data_testing(dirname2)
    # print('\nTRAINING DATA X-')
    # print(x_train.shape)
    # print('\nTRAINING DATA Y-')
    # print(y_train.shape)
    # print('\nVALIDATION DATA X-')
    # print(x_val.shape)
    # print('\nVALIDATION DATA Y-')
    # print(y_val.shape)
    # print('\n')
    # print('\TEST DATA X-')
    # print(x_test.shape)
    # print('\nTEST DATA Y-')
    # print(y_test.shape)
    # print('\n')
    model = build_model(y_train.shape[1])
    print('Training stage')
    print('==============')
    model.fit(x_train,
              y_train,
              epochs=200,
              batch_size=16,
              validation_data=(x_val, y_val))
    score, acc = model.evaluate(x_test, y_test, batch_size=16, verbose=0)
    print('Test performance: accuracy={0}, loss={1}'.format(acc, score))
    model.save('model.h5')
    model.summary()
Example #4
0
def eval(config: dict, model_path='checkpoints/model_epoch30.pth'):
    ssd = build_model(config, is_test=True)
    ssd.load_state_dict(torch.load(model_path))
    ssd.train(False)

    net = Predictor(ssd)

    data_transform = transforms.Compose([
        transforms.ToRelativeBoxes(),
        transforms.Resize(config['image_size']),
        transforms.Scale(),
        transforms.ToTensor()
    ])

    target_transform = MatchPrior(priors, config)

    val_set = CityscapesDataset(config, 'dataset/val', None, data_transform,
                                target_transform, True)

    test_image = val_set.get_image(0)

    boxes, labels, conf, _, _ = net.predict(test_image)

    drawer = Draw(test_image)

    for i in range(boxes.shape[0]):
        top_left = tuple(boxes[i][:2].numpy().flatten())
        bottom_right = tuple(boxes[i][2:].numpy().flatten())
        drawer.rectangle((top_left, bottom_right))

    test_image.save("predict.jpg")
Example #5
0
def eval_disparity(config: dict, model_path='checkpoints/model_epoch30.pth'):
    ssd = build_model(config, is_test=True)
    ssd.load_state_dict(torch.load(model_path))
    ssd.train(False)

    net = Predictor(ssd)

    data_transform = transforms.Compose([
        transforms.ToRelativeBoxes(),
        transforms.Resize(config['image_size']),
        transforms.Scale(),
        transforms.ToTensor()
    ])

    target_transform = MatchPrior(priors, config)

    val_set = CityscapesDataset(config, 'dataset/val', None, data_transform,
                                target_transform, True)

    errors = []

    for i, _ in enumerate(val_set):
        image = val_set.get_image(i)
        gt_disparity = val_set.get_disparity(i)

        _, _, _, _, disparity = net.predict(image)

        error = ((gt_disparity - disparity)**2).flatten()

        errors.append(error)

    errors = torch.cat(errors)
    print("RMSE: {}".format(math.sqrt(errors.mean().item())))
Example #6
0
def eval(config: dict, model_path='checkpoints/model_epoch40.pth'):
    ssd = build_model(config, is_test=True)
    ssd.load_state_dict(torch.load(model_path))
    ssd.train(False)

    net = Predictor(ssd)

    data_transform = transforms.Compose([
        transforms.ToRelativeBoxes(),
        transforms.Resize(config['image_size']),
        transforms.Scale(),
        transforms.ToTensor()
    ])

    target_transform = MatchPrior(priors, config)

    val_set = CityscapesDataset(config, 'dataset/val', None, data_transform,
                                target_transform, True)

    for i, _ in enumerate(val_set):
        if i % 10 == 0:
            print("Image {}".format(i))

        image = val_set.get_image(i)

        probs, boxes, disparity = net.predict(image)
        labels = torch.argmax(probs, dim=probs.dim() - 1)

        chosen_indices = []

        for class_index in range(1, config['num_classes'] + 1):
            class_mask = labels == class_index

            # If there's no prediction in this class, skip the class
            if class_mask.long().sum() <= 0:
                continue

            class_probabilities = probs[class_mask, class_index]
            class_boxes = boxes[class_mask]

            class_indices = nms(class_boxes, class_probabilities, 0.5)
            chosen_indices.append(class_indices)

        chosen_indices = torch.cat(chosen_indices)

        probs = probs[chosen_indices]
        boxes = boxes[chosen_indices]

        box_drawer = Draw(image)

        for box in boxes:
            top_left = tuple(box[:2].numpy().flatten())
            bottom_right = tuple(box[2:].numpy().flatten())
            box_drawer.rectangle((top_left, bottom_right))

        image.save('result.jpg')

        # TODO change to all image evaluation
        break
Example #7
0
def main(dirname):
    x_train, y_train, x_test, y_test = load_data(dirname)
    print(y_train.shape)
    model = build_model(y_train.shape[1])
    print(model.summary())
    print('Training stage')
    print('==============')
    history = model.fit(x_train, y_train, epochs=250, batch_size=16, validation_data=(x_test, y_test))
    score, acc = model.evaluate(x_test, y_test, batch_size=16, verbose=0)
    print('Test performance: accuracy={0}, loss={1}'.format(acc, score))
    model.save('model.h5')
Example #8
0
def get_original_models(seed, num_layers=3):
    model_orig_list = []
    for i, f in enumerate(dataset_fs):
        Xtr, Xts, ytr, yts, Ztr, Zts = f(0, remove_z=False)
        X_test, X_train, Y_test, Y_train = prep_data(Xtr, Xts, ytr, yts)
        optimizer = tf.keras.optimizers.Adam(lr=0.01)
        n_features = X_train.shape[-1]
        batch_size = 30000
        model_full = build_model(input_shape=(n_features), num_layers=num_layers, optimizer=optimizer, seed=seed)
        EPOCHS = 1000
        history = fit_model(model_full, X_train, Y_train, EPOCHS, batch_size=batch_size, verbose=0)
        model_orig_list.append(model_full)
    return model_orig_list
def main(dirname):
    x_train, y_train, x_test, y_test = load_data(dirname)
    num_val_samples = (x_train.shape[0]) // 5
    model = build_model(y_train.shape[1])
    print('Training stage')
    print('==============')
    history = model.fit(x_train,
                        y_train,
                        epochs=100,
                        batch_size=32,
                        validation_data=(x_test, y_test))
    score, acc = model.evaluate(x_test, y_test, batch_size=16, verbose=0)
    print('Test performance: accuracy={0}, loss={1}'.format(acc, score))
    model.save('HPRmodelv4.h5')
Example #10
0
def main():

    # Get Command Line Arguments
    args = ParseCommandLine()
    #Print data directory
    print("Data directory: ", args.data_directory)
    #Print device used
    use_gpu = torch.cuda.is_available() and args.gpu
    if use_gpu:
        print("Training on GPU.")
    else:
        print("Training on CPU.")

    #Print out architecture and hyperparameters
    print("Architecture: {}".format(args.arch))
    print("Learning rate: {}".format(args.learning_rate))
    print("Hidden units: {}".format(args.hidden_units))
    print("Epochs: {}".format(args.epochs))
    #Print our dave_dir option
    if args.save_dir:
        print("Checkpoint save directory: {}".format(args.save_dir))
    #--------------------------------------------------------------------
    # Get data loaders
    train_loader, valid_loader, test_loader, class_to_idx = train_utils.load_data(
        args.data_directory)
    #--------------------------------------------------------------------
    # Build the model
    model = train_utils.build_model(args.arch, args.hidden_units)
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.classifier.parameters(),
                           lr=args.learning_rate)
    model.class_to_idx = class_to_idx
    #--------------------------------------------------------------------
    #Train the model
    train_utils.train_model(model, args.epochs, args.learning_rate, use_gpu,
                            criterion, optimizer, train_loader, valid_loader)
    #--------------------------------------------------------------------
    #Validation on the test set
    test_loss, accuracy = train_utils.validate_model(model, criterion,
                                                     test_loader)
    print("Validation on the test set")
    print(f"Test accuracy: {accuracy:.2f}%")

    #--------------------------------------------------------------------
    # Save the checkpoint
    if input_args.save_dir:
        save_checkpoint(args.arch, args.learning_rate, args.hidden_units,
                        args.epochs, model, optimizer, args.save_directory)
Example #11
0
def feature_importance_nulify(X_train,
                              Y_train,
                              X_test,
                              Y_test,
                              feature_idx=None,
                              seed=49,
                              num_layers=3):
    from train_utils import build_model, get_dataset, fit_model
    from datasets import nulify_feature
    models = []
    n_features = X_train.shape[-1]
    if feature_idx is None:
        feature_idx = range(n_features)
    feature_nb_train = []
    feature_nb_test = []

    for i in feature_idx:
        print(i)
        #     ix = np.delete(np.arange(n_features), i)
        model = build_model(input_shape=(n_features, ),
                            num_layers=num_layers,
                            seed=seed)
        batch_size = 10000
        x, y = nulify_feature(X_train, Y_train, i)
        adult_train = get_dataset(x, y, batch_size=batch_size)

        EPOCHS = 1000
        history = fit_model(model,
                            x,
                            y,
                            EPOCHS=EPOCHS,
                            batch_size=batch_size,
                            verbose=0)
        x, y = nulify_feature(X_test, Y_test, i)
        adult_test = get_dataset(x, y)
        #     file_name="../temp_store/models/adult-{}.h5".format(i)
        #     model.load_weights(file_name)
        models.append(model)
        loss, acc = model.evaluate(adult_train)
        feature_nb_train.append(acc)
        loss, acc = model.evaluate(adult_test)
        feature_nb_test.append(acc)
    return models, feature_nb_train, feature_nb_test
Example #12
0
def evaluate_pertrubed_models(X_train,
                              Y_train,
                              X_test,
                              Y_test,
                              e_alpha=0.25,
                              feature_set=None,
                              attack=no_attack,
                              train_robust=False,
                              model_orig=None,
                              EPOCHS=1000,
                              R_EPOCHS=100,
                              batch_size=30000,
                              seed=49,
                              num_layers=3):
    n_features = X_train.shape[-1]

    adult_train = get_dataset(X_train, Y_train, batch_size=batch_size)
    adult_test = get_dataset(X_test, Y_test)
    optimizer = tf.keras.optimizers.Adam(lr=0.01)
    model_full = build_model(input_shape=(n_features),
                             num_layers=num_layers,
                             optimizer=optimizer,
                             seed=seed)

    if train_robust:
        # TODO
        optimizer = tf.keras.optimizers.Adam(lr=0.01)
        for t in range(R_EPOCHS):
            adv_err, adv_loss = epoch_adversarial(adult_train,
                                                  model_full,
                                                  attack,
                                                  epsilon=0.25,
                                                  alpha=0.08,
                                                  num_iter=30,
                                                  optimizer=optimizer)
    else:
        if model_orig is None:
            print("WARNING")
            print("TRAINING MODEL FROM SCRATCH")
            history = fit_model(model_full,
                                X_train,
                                Y_train,
                                EPOCHS,
                                batch_size=batch_size,
                                verbose=0)
        else:
            model_full.set_weights(model_orig.get_weights())
    models_p = []
    adv_lis = []
    val_names = ("train_err", "test_err", "adv_err", "adv_err_fgsm", "e_loss",
                 "e_loss_train")
    print(*("{}".format(i) for i in val_names), sep="\t")
    if feature_set is None:
        feature_set = range(n_features)
    for i in feature_set:
        print(i)
        z_idx = i
        model_explain = clone_model(model_full)
        optimizer = tf.keras.optimizers.Adam(lr=0.01)
        for t in range(50):
            train_err, train_loss = epoch_explanation(
                adult_train,
                model_explain,
                attack,
                sensitive_feature_id=z_idx,
                e_alpha=e_alpha,
                epsilon=0.25,
                alpha=0.08,
                num_iter=30,
                optimizer=optimizer)
        models_p.append(model_explain)
        adv_err, adv_err_f, e_loss, e_loss_train, test_err = epoch_eval(
            adult_train, adult_test, model_explain, z_idx)
        r = (train_err, test_err, adv_err, adv_err_f, e_loss, e_loss_train)
        adv_lis.append(r)
        print(*("{:.6f}".format(i) for i in r), sep="\t")

    return models_p, adv_lis
Example #13
0
def load_model_lists(model_lists_names,
                     file_dir,
                     num_layers,
                     seed,
                     alpha=15.0):
    '''

    :param model_lists_names:
    :param file_dir:
    :param num_layers:
    :param seed:
    :param alpha:
    :return: model_list[model][dataset]
    '''
    from train_utils import build_model
    import tensorflow as tf
    file_dir = os.path.join(file_dir, "model_{}".format(num_layers),
                            "seed_{}".format(seed))
    model_dict = defaultdict(list)
    #     for i,m_name in enumerate(model_lists_names):
    #         model_dict[m_name] = []
    f_nb_list = f_sensitive_list

    for i, f in enumerate(dataset_fs):
        optimizer = tf.keras.optimizers.Adam(lr=0.01)
        n_features = n_features_list[i]
        for j in range(len(f_nb_list[i])):
            sensitive_feature_id = f_nb_list[i][j]
            dataset_name = dataset_names[i]
            f_name = feature_name_dict[dataset_name][sensitive_feature_id]
            for k, model_list_name in enumerate(model_lists_names):
                file_name = "-".join(
                    map(lambda x: "_".join(x.split(" ")), [
                        dataset_name, model_list_name, f_name,
                        "alpha_{}".format(round(alpha, 0))
                    ]))

                # distinguish b/t a list of model which vary by feature and
                # the original model (does not vary by feature)
                if model_list_name != MODEL_ORIGINAL:
                    file_name + "-feature_" + str(j)
                    file_path = os.path.join(file_dir, file_name)
                    model = build_model(input_shape=(n_features),
                                        num_layers=num_layers,
                                        optimizer=optimizer)
                    model.load_weights(file_path)
                    l = model_dict[model_list_name]
                    if l == []:
                        [l.append([]) for _ in range(len(dataset_fs))]
                    model_dict[model_list_name][i].append(model)
                #                 print(file_path)
                else:
                    file_path = os.path.join(file_dir, file_name)
                    if j == 0:
                        model = build_model(input_shape=(n_features),
                                            num_layers=num_layers,
                                            optimizer=optimizer)
                        model.load_weights(file_path)
                        model_dict[model_list_name].append(model)

    model_lists = [model_dict[m_name] for m_name in model_lists_names]
    return model_lists
Example #14
0
def train_ssd(start_epoch: int, end_epoch: int, config: dict, use_gpu: bool = True, model_name='model',
              checkpoint_folder='checkpoints',
              log_folder='log', redirect_output=True):
    if not os.path.isdir(log_folder):
        os.makedirs(log_folder)

    priors = generate_priors(config)

    target_transform = MatchPrior(priors, config)
    train_transform = transforms.Compose([
        transforms.CustomJitter(),
        transforms.ToOpenCV(),
        transforms.RandomMirror()
    ])
    data_transform = transforms.Compose([
        transforms.ToRelativeBoxes(),
        transforms.Resize(config['image_size']),
        transforms.Scale(),
        transforms.ToTensor()
    ])
    train_set = CityscapesDataset(config, 'dataset/train', train_transform,
                                  data_transform, target_transform)

    train_loader = DataLoader(train_set, batch_size=32,
                              shuffle=True, num_workers=4)

    ssd = build_model(config)
    if use_gpu:
        ssd = ssd.cuda()
    ssd.train(True)
    if os.path.isfile(os.path.join(checkpoint_folder, "{}_epoch{}.pth".format(model_name, start_epoch - 1))):
        ssd.load_state_dict(
            torch.load(os.path.join(checkpoint_folder, "{}_epoch{}.pth".format(model_name, start_epoch - 1))))

    criterion = MultiBoxLoss(0.5, 0, 3, config)
    disparity_criterion = BerHuLoss()

    ssd_params = [
        {'params': ssd.extractor.parameters()},
        {'params': ssd.extras.parameters()},
        {'params': itertools.chain(ssd.class_headers.parameters(),
                                   ssd.location_headers.parameters(),
                                   ssd.upsampling.parameters())}
    ]

    optimizer = SGD(ssd_params, lr=0.001, momentum=0.9, weight_decay=0.0005)
    lr_scheduler = CosineAnnealingLR(optimizer, 120, last_epoch= -1)
    if os.path.isfile(os.path.join(checkpoint_folder, "optimizer_epoch{}.pth".format(start_epoch - 1))):
        print("Loading previous optimizer")
        optimizer.load_state_dict(
            torch.load(os.path.join(checkpoint_folder, "optimizer_epoch{}.pth".format(start_epoch - 1))))

    for epoch in range(start_epoch, end_epoch):
        lr_scheduler.step()
        running_loss = 0.0
        running_regression_loss = 0.0
        running_classification_loss = 0.0
        running_disparity_loss = 0.0
        num_steps = len(train_loader)
        aps = torch.zeros((config['num_classes'],))
        running_map = 0

        if redirect_output:
            sys.stdout = open(os.path.join(log_folder, 'train_epoch_{}.txt'.format(epoch)), 'w')

        for i, batch in enumerate(train_loader):
            images, gt_locations, labels, gt_disparity = batch

            if use_gpu:
                images = images.cuda()
                gt_locations = gt_locations.cuda()
                labels = labels.cuda()
                gt_disparity = gt_disparity.cuda()

            optimizer.zero_grad()

            confidences, locations, disparity = ssd(images)

            regression_loss, classification_loss = criterion.forward(confidences, locations, labels, gt_locations)
            disparity_loss = disparity_criterion.forward(disparity, gt_disparity)
            loss = regression_loss + classification_loss + 10 * disparity_loss
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            running_regression_loss += regression_loss.item()
            running_classification_loss += classification_loss.item()
            running_disparity_loss += disparity_loss

            with torch.no_grad():
                boxes = convert_locations_to_boxes(locations, priors.cuda(), config['variance'][0],
                                                   config['variance'][1])
                gt_boxes = convert_locations_to_boxes(gt_locations, priors.cuda(), config['variance'][0],
                                                      config['variance'][1])
                batch_map, batch_ap = calculate_map(confidences, labels, boxes, gt_boxes)
                running_map += batch_map
                aps += batch_ap

        avg_loss = running_loss / num_steps
        avg_reg_loss = running_regression_loss / num_steps
        avg_class_loss = running_classification_loss / num_steps
        avg_disp_loss = running_disparity_loss / num_steps
        mean_ap = running_map / num_steps
        epoch_ap = aps / num_steps

        print("Epoch {}".format(epoch))
        print("Average Loss: {:.2f}".format(avg_loss))
        print("Average Regression Loss: {:.2f}".format(avg_reg_loss))
        print("Average Classification Loss: {:.2f}".format(avg_class_loss))
        print("Average Disparity Loss: {:.2f}".format(avg_disp_loss))
        print("Average mAP: {:.2f}".format(mean_ap))
        print("Average AP per class: {}".format(epoch_ap))

        torch.save(ssd.state_dict(), os.path.join(checkpoint_folder, "{}_epoch{}.pth".format(model_name, epoch)))
        torch.save(optimizer.state_dict(), os.path.join(checkpoint_folder, "optimizer_epoch{}.pth".format(epoch)))

    if sys.stdout != sys.__stdout__:
        sys.stdout.close()
        sys.stdout = sys.__stdout__