def create_model(args, num_classes, embedding_vector): nl_str = args.nonlin.lower() if nl_str == 'relu': nonlin = nn.ReLU elif nl_str == 'threshrelu': nonlin = ThresholdReLU elif nl_str == 'sign11': nonlin = partial(Sign11, targetprop_rule=args.tp_rule) elif nl_str == 'qrelu': nonlin = partial(qReLU, targetprop_rule=args.tp_rule, nsteps=3) else: raise NotImplementedError( 'no other non-linearities currently supported') # input size if args.ds == 'sentiment140' or args.ds == 'tsad': input_shape, target_shape = (1, 60, 50), None elif args.ds == 'semeval': input_shape, target_shape = (1, 60, 100), (1, 6, 100) else: raise NotImplementedError('no other datasets currently supported') # create a model with the specified architecture if args.arch == 'cnn': model = CNN(input_shape, num_classes, embedding_vector, nonlin=nonlin) elif args.arch == 'lstm': model = LSTM(input_shape, num_classes, embedding_vector) elif args.arch == 'cnn-lstm': model = CNN_LSTM(input_shape, num_classes, embedding_vector, nonlin=nonlin) elif args.arch == 'lstm-cnn': model = LSTM_CNN(input_shape, num_classes, embedding_vector, nonlin=nonlin) elif args.arch == 'textcnn': model = TextCNN(input_shape, num_classes, embedding_vector, nonlin=nonlin) elif args.arch == 'bilstm': model = BiLSTM(input_shape, target_shape, num_classes, embedding_vector, nonlin=nonlin) else: raise NotImplementedError('other models not yet supported') logging.info("{} model has {} parameters and non-linearity={} ({})".format( args.arch, sum([p.data.nelement() for p in model.parameters()]), nl_str, args.tp_rule.name)) if len(args.gpus) > 1: model = nn.DataParallel(model) if args.cuda: model.cuda() return model
batch_size = 40 dataloader = data.DataLoader(database.train_set, batch_size=batch_size, shuffle=True, drop_last=True) if model == "cnn": encoder = CNN(300, 200, 3) else: encoder = GRU(300, 200) classifier = FFNN(200, 50, 8) learning_rate = 1e-3 n_epochs = 10 optimizer_encoder = torch.optim.Adam(encoder.parameters(), lr=learning_rate, weight_decay=1e-4) optimizer_classifier = torch.optim.Adam(classifier.parameters(), lr=learning_rate) train_errors = [] dev_errors = [] test_errors = [] for i in range(n_epochs): for batch in tqdm(dataloader): train_epoch(batch, encoder, classifier, optimizer_encoder, optimizer_classifier) train_pred, train_ans = predict(encoder, classifier,
def main(): args = options() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") train_set = preload.datasets.MNISTDataset('../data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, )) ])) train_loader = preload.dataloader.DataLoader(train_set, batch_size=args.batch_size) test_loader = preload.dataloader.DataLoader( preload.datasets.MNISTDataset('../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, )) ])), batch_size=args.test_batch_size) if args.model == 'cnn': model = CNN().to(device) elif args.model == 'cnn_leaky_relu': model = CNNLeakyReLU.to(device) else: print("model error") exit() start_point = copy.deepcopy(model.state_dict()) # print("\nNormal training:") # if args.load_model: # model.load_state_dict(torch.load("mnist_cnn.pt")) # else: # model.load_state_dict(start_point) # optimizer = optim.SGD(model.parameters(), lr=args.lr) # scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma) # normal_method = NormalTrain(model, device, train_loader, optimizer) # model_training(args, model, normal_method, device, test_loader, scheduler) # if args.save_model: # torch.save(model.state_dict(), "mnist_cnn.pt") # evaluation(args, model, device, test_loader) # print("\nNormal training with L2 regularization:") # if args.load_model: # model.load_state_dict(torch.load("mnist_cnn_l2_regular.pt")) # else: # model.load_state_dict(start_point) # optimizer = optim.SGD(model.parameters(), lr=args.lr) # scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma) # l2_method = L2RegularTrain(model, # device, # train_loader, # optimizer, # weight_decay=args.weight_decay) # model_training(args, model, l2_method, device, test_loader, scheduler) # if args.save_model: # torch.save(model.state_dict(), "mnist_cnn_l2_regular.pt") # evaluation(args, model, device, test_loader) # print("\nTraining with adversarial gradient regularization:") # if args.load_model: # model.load_state_dict(torch.load("mnist_cnn_adv_grad_regular.pt")) # else: # model.load_state_dict(start_point) # optimizer = optim.SGD(model.parameters(), lr=args.lr) # scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma) # adv_grad_reg_method = AdversarialGradientRegularTrain(model, # device, # train_loader, # optimizer, # gradient_decay=args.gradient_decay) # model_training(args, model, adv_grad_reg_method, device, test_loader, scheduler) # if args.save_model: # torch.save(model.state_dict(), "mnist_cnn_adv_grad_regular.pt") # evaluation(args, model, device, test_loader) # print("\nAdversarial training (FGSM):") # if args.load_model: # model.load_state_dict(torch.load("mnist_cnn_fgsm.pt")) # else: # model.load_state_dict(start_point) # optimizer = optim.SGD(model.parameters(), lr=args.lr) # scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma) # fgsm = FastGradientSignMethod(lf=F.nll_loss, eps=args.eps) # adv_method = AdversarialTrain(model, device, train_loader, optimizer, attack=fgsm) # model_training(args, model, adv_method, device, test_loader, scheduler) # if args.save_model: # torch.save(model.state_dict(), "mnist_cnn_{}.pt".format(fgsm.name)) # evaluation(args, model, device, test_loader) # print("\nAdversarial training (BIM):") # if args.load_model: # model.load_state_dict(torch.load("mnist_cnn_bim.pt")) # else: # model.load_state_dict(start_point) # optimizer = optim.SGD(model.parameters(), lr=args.lr) # scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma) # bim = BasicIterativeMethod(lf=F.nll_loss, eps=args.eps, alpha=args.alpha, iter_max=args.iter_max) # adv_method = AdversarialTrain(model, device, train_loader, optimizer, attack=bim) # model_training(args, model, adv_method, device, test_loader, scheduler) # if args.save_model: # torch.save(model.state_dict(), "mnist_cnn_{}.pt".format(bim.name)) # evaluation(args, model, device, test_loader) # print("\nAdversarial training (PGD):") # if args.load_model: # model.load_state_dict(torch.load("mnist_cnn_pgd.pt")) # else: # model.load_state_dict(start_point) # optimizer = optim.SGD(model.parameters(), lr=args.lr) # scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma) # pgd = ProjectedGradientDescent(lf=F.nll_loss, eps=args.eps, alpha=args.alpha, iter_max=args.iter_max) # adv_method = AdversarialTrain(model, device, train_loader, optimizer, attack=pgd) # model_training(args, model, adv_method, device, test_loader, scheduler) # if args.save_model: # torch.save(model.state_dict(), "mnist_cnn_{}.pt".format(pgd.name)) # evaluation(args, model, device, test_loader) print("\nAdversarial guided training (FGSM):") if args.load_model: model.load_state_dict(torch.load("mnist_cnn_adv_guided.pt")) else: model.load_state_dict(start_point) optimizer = optim.SGD(model.parameters(), lr=args.lr) scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma) guide_sets = make_guide_set(train_set, size=1000) adv_guided_method = AdversarialGuidedTrain( model, device, train_loader, optimizer, guide_sets=guide_sets, epsilon=args.eps, beta=args.beta, weight_decay=args.weight_decay, gradient_decay=args.gradient_decay) model_training(args, model, adv_guided_method, device, test_loader, scheduler) if args.save_model: torch.save(model.state_dict(), "mnist_cnn_adv_guided.pt") evaluation(args, model, device, test_loader)
f'Max Val. Acc occurred for epoch {max_epoch} with Acc: {max_val * 100:.2f}%' ) print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc * 100:.2f}%') # for CNN model if model_type == 'CNN': hidden_size = 128 pool_size = 2 n_filters = 128 filter_sizes = [3, 8] n_epochs = 30 cnn = CNN(vocab_size, embedding_size, n_filters, filter_sizes, pool_size, hidden_size, n_classes, dropout_rate) optimizer = optim.Adam(cnn.parameters(), lr=lr) train_loss, train_acc, val_loss, val_acc = train_val( n_epochs, cnn, train_iter, val_iter, optimizer, lfunc, model_type, 'two_layer', optim_type) export_graph('cnn24_loss.png', n_epochs, train_loss, val_loss, 'Loss Across Epochs (CNN)', 'Loss') export_graph('cnn24_acc.png', n_epochs, train_acc, val_acc, 'Network Accuracy Across Epochs (CNN)', 'Accuracy') # test model cnn.load_state_dict( torch.load( os.path.join( path, 'CNN_' + 'two_layer' + '_' + optim_type + '_saved_state.pt')))
if __name__ == '__main__': if not args.debug_mode: import wandb wandb.init(project=args.project, name=args.name, tags=args.tags, config=args) train_data = dataset.MDB_Dataset('MusicDelta_80sRock') test_data = dataset.MDB_Dataset('MusicDelta_80sRock') else: train_data = dataset.MDB_Dataset('MusicDelta_80sRock') test_data = dataset.MDB_Dataset('MusicDelta_80sRock') print_args(args) # get_model if args.model_arc == 'CNN': model = CNN(hidden_channel_num=10, output_number=4) else: raise AssertionError model = model.to(args.device) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) if not args.debug_mode: wandb.watch(model) trainer = Trainer(model, optimizer, args.device, args.debug_mode, args.test_per_epoch, args.num_epochs, args.weight_path, train_data, test_data) trainer.train()