def test_saved_model_variable_rnn(): PATH_TRAIN_SEQS = os.path.join( project_root, "data/mortality/processed/mortality.seqs.train") PATH_VALID_SEQS = os.path.join( project_root, "data/mortality/processed/mortality.seqs.validation") PATH_VALID_LABELS = os.path.join( project_root, "data/mortality/processed/mortality.labels.validation") train_seqs = pickle.load(open(PATH_TRAIN_SEQS, 'rb')) valid_seqs = pickle.load(open(PATH_VALID_SEQS, 'rb')) valid_labels = pickle.load(open(PATH_VALID_LABELS, 'rb')) num_features = calculate_num_features(train_seqs) dataset = VisitSequenceWithLabelDataset(valid_seqs, valid_labels, num_features) loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=3, shuffle=False, collate_fn=visit_collate_fn, num_workers=0) model = torch.load(os.path.join(project_root, "output/mortality/MyVariableRNN.pth"), map_location=lambda storage, loc: storage) any_exception = model_eval(model, loader) assert not any_exception, "your saved model should be matched with your model definition"
def test_visit_collate_fn(): PATH_TRAIN_SEQS = os.path.join( project_root, "data/mortality/processed/mortality.seqs.train") PATH_VALID_SEQS = os.path.join( project_root, "data/mortality/processed/mortality.seqs.validation") PATH_VALID_LABELS = os.path.join( project_root, "data/mortality/processed/mortality.labels.validation") train_seqs = pickle.load(open(PATH_TRAIN_SEQS, 'rb')) valid_seqs = pickle.load(open(PATH_VALID_SEQS, 'rb')) valid_labels = pickle.load(open(PATH_VALID_LABELS, 'rb')) num_features = calculate_num_features(train_seqs) dataset = VisitSequenceWithLabelDataset(valid_seqs, valid_labels, num_features) sample_batch = list(zip(dataset.seqs, dataset.labels))[:3] (seqs_tensor, lengths_tensor), labels_tensor = visit_collate_fn(sample_batch) expect(isinstance(seqs_tensor, torch.Tensor), "it should be Tensor") expect(isinstance(lengths_tensor, torch.Tensor), "it should be Tensor") expect(isinstance(labels_tensor, torch.Tensor), "it should be Tensor") expect(seqs_tensor.dtype == torch.float32, "seqs should be FloatTensor") expect(lengths_tensor.dtype == torch.int64, "lengths should be LongTensor") expect(labels_tensor.dtype == torch.int64, "labels should be LongTensor") assert_expectations()
def test_visit_sequence_with_label_dataset(): PATH_VALID_SEQS = os.path.join( project_root, "data/mortality/processed/mortality.seqs.validation") PATH_VALID_LABELS = os.path.join( project_root, "data/mortality/processed/mortality.labels.validation") valid_seqs = pickle.load(open(PATH_VALID_SEQS, 'rb')) valid_labels = pickle.load(open(PATH_VALID_LABELS, 'rb')) num_features = calculate_num_features(valid_seqs) dataset = VisitSequenceWithLabelDataset(valid_seqs, valid_labels, num_features) expect(isinstance(dataset.seqs, list), "dataset.seqs should be a list") expect(isinstance(dataset.labels, list), "dataset.labels should be a list") expect( len(valid_seqs) == len(dataset.seqs), "length of the dataset is not matched") sample = dataset.seqs[0] expect( isinstance(sample, np.ndarray) or isinstance(sample.toarray(), np.ndarray), "each seqs element should be a numpy array or scipy.sparse matrix") expect(sample.shape[0] == len(valid_seqs[0]), "number of rows for the first patient not matched") expect( sample.shape[1] == num_features, "number of cols and the number of features are not matched with each other" ) assert_expectations()
def test_calculate_num_features(): # using the validation data only for the test purpose # You should use the train set in your code. PATH_VALID_SEQS = os.path.join( project_root, "data/mortality/processed/mortality.seqs.validation") valid_seqs = pickle.load(open(PATH_VALID_SEQS, 'rb')) num_features = calculate_num_features(valid_seqs) assert num_features == 902
def test_calculate_num_features(): PATH_TRAIN_SEQS = os.path.join( project_root, "data/mortality/processed/mortality.seqs.train") train_seqs = pickle.load(open(PATH_TRAIN_SEQS, 'rb')) num_features = calculate_num_features(train_seqs) expect(isinstance(num_features, int)) expect(num_features == 911) assert_expectations()
"cuda" if torch.cuda.is_available() and USE_CUDA else "cpu") torch.manual_seed(1) if device.type == "cuda": torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # Data loading print('===> Loading entire datasets') train_seqs = pickle.load(open(PATH_TRAIN_SEQS, 'rb')) train_labels = pickle.load(open(PATH_TRAIN_LABELS, 'rb')) valid_seqs = pickle.load(open(PATH_VALID_SEQS, 'rb')) valid_labels = pickle.load(open(PATH_VALID_LABELS, 'rb')) test_seqs = pickle.load(open(PATH_TEST_SEQS, 'rb')) test_labels = pickle.load(open(PATH_TEST_LABELS, 'rb')) num_features = calculate_num_features(train_seqs) train_dataset = VisitSequenceWithLabelDataset(train_seqs, train_labels, num_features) valid_dataset = VisitSequenceWithLabelDataset(valid_seqs, valid_labels, num_features) test_dataset = VisitSequenceWithLabelDataset(test_seqs, test_labels, num_features) train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=visit_collate_fn, num_workers=NUM_WORKERS) valid_loader = DataLoader(dataset=valid_dataset, batch_size=BATCH_SIZE,
print(PATH_TRAIN_SEQS) print(PATH_TRAIN_LABELS) NUM_EPOCHS = 5 BATCH_SIZE = 1 USE_CUDA = False # Set 'True' if you want to use GPU NUM_WORKERS = 0 # Data loading print('===> Loading entire datasets') seqs = pickle.load(open(PATH_TRAIN_SEQS, 'rb')) labels = pickle.load(open(PATH_TRAIN_LABELS, 'rb')) #print(seqs) print(np.shape(seqs)) #print(labels) num_features = calculate_num_features(seqs) print(num_features) seq = torch.from_numpy(np.array(seqs).astype('float32')) lbl = torch.from_numpy(np.array(labels).astype('long')) #dataset = TensorDataset(seq.unsqueeze(-1), lbl) dataset = TensorDataset(seq, lbl) torch.manual_seed(0) #dataset = VisitSequenceWithLabelDataset(seqs, labels, num_features) #Split Dataset train_size = int(0.6 * len(dataset)) validation_size = int(0.2 * len(dataset))