Esempio n. 1
0
def main():
    print("Loading data")
    train, valid, test, vocab = prepare_data()  # Load dataset
    model = make_model(len(vocab))  # Design a victim model

    print("Training")
    trained_model = train_model(model, train, valid,
                                vocab)  # Train the victim model

    print(
        "Generating adversarial samples (this step will take dozens of minutes)"
    )
    clsf = OpenAttack.PytorchClassifier(trained_model,
                                        word2id=vocab)  # Wrap the victim model
    adversarial_samples = attack(
        clsf,
        train)  # Conduct adversarial attacks and generate adversarial examples

    print("Adversarially training classifier")
    finetune_model = train_model(
        trained_model, train + adversarial_samples, valid,
        vocab)  # Retrain the classifier with additional adversarial examples

    print("Testing enhanced model (this step will take dozens of minutes)")
    attack(
        clsf, train
    )  # Re-attack the victim model to measure the effect of adversarial training
Esempio n. 2
0
def train_model(model, data_train, data_valid, vocab, num_epoch=10):
    mx_acc = None
    mx_model = None
    for i in range(num_epoch):
        loss = train_epoch(model, data_train, vocab)
        clsf = OpenAttack.PytorchClassifier(model, word2id=vocab)
        accuracy = len(data_valid.eval(clsf).correct()) / len(data_valid)
        print("Epoch %d: loss: %lf, accuracy %lf" % (i, loss, accuracy))
        if mx_acc is None or mx_acc < accuracy:
            mx_model = model.state_dict()
    model.load_state_dict(mx_model)
    return model
Esempio n. 3
0
def main():
    print("Loading data")
    train, valid, test, vocab = prepare_data()  # Load dataset
    model = make_model(len(vocab))  # Design a victim model

    print("Training")
    trained_model = train_model(model, train, valid,
                                vocab)  # Train the victim model

    print(
        "Generating adversarial samples (this step will take dozens of minutes)"
    )
    clsf = OpenAttack.PytorchClassifier(trained_model,
                                        word2id=vocab)  # Wrap the victim model
    adversarial_samples = attack(
        clsf,
        train)  # Conduct adversarial attacks and generate adversarial examples

    print("Adversarially training classifier")
    print(train.features)
    print(adversarial_samples.features)

    new_dataset = {"x": [], "y": [], "tokens": []}
    for it in train:
        new_dataset["x"].append(it["x"])
        new_dataset["y"].append(it["y"])
        new_dataset["tokens"].append(it["tokens"])

    for it in adversarial_samples:
        new_dataset["x"].append(it["x"])
        new_dataset["y"].append(it["y"])
        new_dataset["tokens"].append(it["tokens"])

    finetune_model = train_model(
        trained_model, datasets.Dataset.from_dict(new_dataset), valid,
        vocab)  # Retrain the classifier with additional adversarial examples

    print("Testing enhanced model (this step will take dozens of minutes)")
    attack(
        clsf, train
    )  # Re-attack the victim model to measure the effect of adversarial training