with open(args.normal, 'rb') as f: data = pickle.load(f) elif args.adv: print("======================= Loading Adversarial Dataset") with open(args.adv, 'rb') as f: data = pickle.load(f) else: raise Exception('Must provide normal or adversarial dataset') # Ground truth labels are the same no matter what with open(args.labels, 'rb') as f: labels = pickle.load(f) # Use their class to load the dataset test_set = VisitSequenceWithLabelDataset(data, labels, args.num_features, reverse=True) test_loader = DataLoader(dataset=test_set, batch_size=1, shuffle=False, collate_fn=visit_collate_fn, num_workers=0) # Load the model # Check to see if it was trained onf a GPU or not, act accordingly try: model = torch.load(args.model) except RuntimeError: model = torch.load(args.model, map_location=device) model = model.cpu()
def main(argv): global args args = parser.parse_args(argv) if args.threads == -1: args.threads = torch.multiprocessing.cpu_count() - 1 or 1 print('===> Configuration') print(args) cuda = args.cuda if cuda: if torch.cuda.is_available(): print('===> {} GPUs are available'.format( torch.cuda.device_count())) else: raise Exception("No GPU found, please run with --no-cuda") # Fix the random seed for reproducibility # random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if cuda: torch.cuda.manual_seed(args.seed) # Data loading print('===> Loading entire datasets') with open(args.data_path + 'train.seqs', 'rb') as f: train_seqs = pickle.load(f) with open(args.data_path + 'train.labels', 'rb') as f: train_labels = pickle.load(f) with open(args.data_path + 'valid.seqs', 'rb') as f: valid_seqs = pickle.load(f) with open(args.data_path + 'valid.labels', 'rb') as f: valid_labels = pickle.load(f) with open(args.data_path + 'test.seqs', 'rb') as f: test_seqs = pickle.load(f) with open(args.data_path + 'test.labels', 'rb') as f: test_labels = pickle.load(f) max_code = max( map(lambda p: max(map(lambda v: max(v), p)), train_seqs + valid_seqs + test_seqs)) num_features = max_code + 1 print(" ===> Construct train set") train_set = VisitSequenceWithLabelDataset(train_seqs, train_labels, num_features, reverse=False) print(" ===> Construct validation set") valid_set = VisitSequenceWithLabelDataset(valid_seqs, valid_labels, num_features, reverse=False) print(" ===> Construct test set") test_set = VisitSequenceWithLabelDataset(test_seqs, test_labels, num_features, reverse=False) train_loader = DataLoader(dataset=train_set, batch_size=args.batch_size, shuffle=True, collate_fn=visit_collate_fn, num_workers=args.threads) valid_loader = DataLoader(dataset=valid_set, batch_size=args.eval_batch_size, shuffle=False, collate_fn=visit_collate_fn, num_workers=args.threads) test_loader = DataLoader(dataset=test_set, batch_size=args.eval_batch_size, shuffle=False, collate_fn=visit_collate_fn, num_workers=args.threads) print('===> Dataset loaded!') # Create model print('===> Building a Model') model = RNN(dim_input=num_features, dim_emb=128, dim_hidden=128) if cuda: model = model.cuda() print(model) print('===> Model built!') weight_class0 = torch.mean(torch.FloatTensor(train_set.labels)) weight_class1 = 1.0 - weight_class0 weight = torch.FloatTensor([weight_class0, weight_class1]) criterion = nn.CrossEntropyLoss(weight=weight) if args.cuda: criterion = criterion.cuda() optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, nesterov=False, weight_decay=args.weight_decay) scheduler = ReduceLROnPlateau(optimizer, 'min') best_valid_epoch = 0 best_valid_loss = sys.float_info.max train_losses = [] valid_losses = [] if not os.path.exists(args.save): os.makedirs(args.save) for ei in trange(args.epochs, desc="Epochs"): # Train _, _, train_loss = rnn_epoch(train_loader, model, criterion=criterion, optimizer=optimizer, train=True) train_losses.append(train_loss) # Eval _, _, valid_loss = rnn_epoch(valid_loader, model, criterion=criterion) valid_losses.append(valid_loss) scheduler.step(valid_loss) is_best = valid_loss < best_valid_loss if is_best: best_valid_epoch = ei best_valid_loss = valid_loss # evaluate on the test set test_y_true, test_y_pred, test_loss = rnn_epoch( test_loader, model, criterion=criterion) if args.cuda: test_y_true = test_y_true.cpu() test_y_pred = test_y_pred.cpu() test_auc = roc_auc_score(test_y_true.numpy(), test_y_pred.numpy()[:, 1], average="weighted") test_aupr = average_precision_score(test_y_true.numpy(), test_y_pred.numpy()[:, 1], average="weighted") with open(args.save + 'train_result.txt', 'w') as f: f.write('Best Validation Epoch: {}\n'.format(ei)) f.write('Best Validation Loss: {}\n'.format(valid_loss)) f.write('Train Loss: {}\n'.format(train_loss)) f.write('Test Loss: {}\n'.format(test_loss)) f.write('Test AUROC: {}\n'.format(test_auc)) f.write('Test AUPR: {}\n'.format(test_aupr)) torch.save(model, args.save + 'best_model.pth') torch.save(model.state_dict(), args.save + 'best_model_params.pth') # plot if args.plot: plt.figure(figsize=(12, 9)) plt.plot(np.arange(len(train_losses)), np.array(train_losses), label='Training Loss') plt.plot(np.arange(len(valid_losses)), np.array(valid_losses), label='Validation Loss') plt.xlabel('epoch') plt.ylabel('Loss') plt.legend(loc="best") plt.tight_layout() plt.savefig(args.save + 'loss_plot.eps', format='eps') plt.close() print('Best Validation Epoch: {}\n'.format(best_valid_epoch)) print('Best Validation Loss: {}\n'.format(best_valid_loss)) print('Train Loss: {}\n'.format(train_loss)) print('Test Loss: {}\n'.format(test_loss)) print('Test AUROC: {}\n'.format(test_auc)) print('Test AUPR: {}\n'.format(test_aupr))
def main(argv): global args args = parser.parse_args(argv) if args.threads == -1: args.threads = torch.multiprocessing.cpu_count() - 1 or 1 if args.save == '': args.save = os.path.dirname(args.seqs_path) print('===> Configuration') print(args) cuda = args.cuda if cuda: if torch.cuda.is_available(): print('===> {} GPUs are available'.format( torch.cuda.device_count())) else: raise Exception("No GPU found, please run with --no-cuda") # Fix the random seed for reproducibility np.random.seed(args.seed) torch.manual_seed(args.seed) if cuda: torch.cuda.manual_seed(args.seed) # Data loading print('===> Loading test dataset') with open(args.seqs_path, 'rb') as f: test_seqs = pickle.load(f) with open(args.labels_path, 'rb') as f: test_labels = pickle.load(f) print(" ===> Num features: {}".format(args.num_features)) test_set = VisitSequenceWithLabelDataset(test_seqs, test_labels, args.num_features, reverse=True) test_loader = DataLoader(dataset=test_set, batch_size=args.eval_batch_size, shuffle=False, collate_fn=visit_collate_fn, num_workers=args.threads) print('===> Dataset loaded!') # Load model print('===> Loading a Model') model = torch.load(args.model_path, map_location=torch.device('cpu')) model = model.cpu() if cuda: model = model.cuda() print(model) print('===> Model built!') # No loss weight for test criterion = nn.CrossEntropyLoss() if args.cuda: criterion = criterion.cuda() if not os.path.exists(args.save): os.makedirs(args.save) # evaluate on the test set test_y_true, test_y_pred, test_loss = retain_epoch(test_loader, model, criterion=criterion) if args.cuda: test_y_true = test_y_true.cpu() test_y_pred = test_y_pred.cpu() test_auc = roc_auc_score(test_y_true.numpy(), test_y_pred.numpy()[:, 1], average="weighted") test_aupr = average_precision_score(test_y_true.numpy(), test_y_pred.numpy()[:, 1], average="weighted") with open(os.path.join(args.save, 'test_result.txt'), 'w') as f: f.write('Test Loss: {}\n'.format(test_loss)) f.write('Test AUROC: {}\n'.format(test_auc)) f.write('Test AUPR: {}\n'.format(test_aupr)) print("Done!") print('Test Loss: {}\n'.format(test_loss)) print('Test AUROC: {}\n'.format(test_auc)) print('Test AUPR: {}\n'.format(test_aupr))
# Fix the random seed for reproducibility np.random.seed(args.seed) torch.manual_seed(args.seed) if cuda: torch.cuda.manual_seed(args.seed) # Data loading with open(args.data_path + 'test.seqs', 'rb') as f: test_seqs = pickle.load(f) with open(args.data_path + 'test.labels', 'rb') as f: test_labels = pickle.load(f) print(" ===> Construct a clean test set") # NOTE: reverse=True since we use RETAIN clean_test_set = VisitSequenceWithLabelDataset(test_seqs, test_labels, args.num_features, reverse=True) clean_loader = DataLoader(dataset=clean_test_set, batch_size=1, shuffle=False, collate_fn=visit_collate_fn, num_workers=args.threads) print('===> Dataset loaded!') # Create model print('===> Building a Model') source_model = torch.load(args.model_path, map_location=torch.device('cpu')) source_model = source_model.cpu() if args.cuda: source_model = source_model.cuda()