예제 #1
0
def testing(testing_flag, batch_size):
    autoencoder, encoder, decoder = chapman_autoencoder.get_trained_autoencoder(
        testing_flag)
    model = deepcopy(encoder)

    # get chapman datasets
    user_datasets, patient_to_rhythm_dict, test_train_split_dict, working_directory = get_datasets_from_paths(
        testing_flag)

    # get 4 unique rhythms
    unique_rhythms_words = set(list(patient_to_rhythm_dict.values()))
    # {'AFIB': 0, 'SB': 1, 'SR': 2, 'GSVT': 3}
    rythm_to_label_encoding = {
        rhythm: index
        for index, rhythm in enumerate(unique_rhythms_words)
    }

    train_chapman_dataset = byol_chapman_utilities.ChapmanDataset(
        user_datasets, patient_to_rhythm_dict, test_train_split_dict, 'train')
    test_chapman_dataset = byol_chapman_utilities.ChapmanDataset(
        user_datasets, patient_to_rhythm_dict, test_train_split_dict, 'test')

    train_loader = DataLoader(train_chapman_dataset,
                              batch_size=batch_size,
                              shuffle=True)
    val_loader = DataLoader(
        test_chapman_dataset,
        batch_size=batch_size,
    )

    byol_model = deepcopy(model)
    byol = byol_chapman_utilities.BYOL(byol_model, image_size=(2500, 4))
    byol_trainer = pl.Trainer(
        max_epochs=10,
        accumulate_grad_batches=2048 // batch_size,
        weights_summary=None,
    )
    byol_trainer.fit(byol, train_loader, val_loader)

    byol_encoder = byol.encoder
    state_dict = byol_model.state_dict()
    new_model = deepcopy(encoder)
    new_model.load_state_dict(state_dict)

    for data_label in val_loader:
        data, label = data_label
        byol_encoded_data = byol_encoder(data.float())
        byol_new_model_data = new_model(data.float())
        print(f'byol encoder data shape: {byol_encoded_data.size()}')
        print(f'byol state dict model shape: {byol_new_model_data.size()}')
        print(f'byol encoded size {byol_encoded_data.size()}')
        print(label)
예제 #2
0
def multiple_segment_main(testing_flag, batch_size, epoch_number, latent_dim,
                          projection_dim):
    autoencoder, encoder, decoder = chapman_autoencoder.get_trained_autoencoder_ms(
        testing_flag, latent_dim)
    # we will use the encoder as input into byol
    model = deepcopy(encoder)

    # get chapman datasets
    user_datasets, patient_to_rhythm_dict, test_train_split_dict, working_directory, path_to_embeddings = get_datasets_from_paths(
        testing_flag)

    # get 4 unique rhythms
    unique_rhythms_words = set(list(patient_to_rhythm_dict.values()))
    # {'AFIB': 0, 'SB': 1, 'SR': 2, 'GSVT': 3}
    rythm_to_label_encoding = {
        rhythm: index
        for index, rhythm in enumerate(unique_rhythms_words)
    }

    # get train and test datasets and create dataloaders
    train_chapman_dataset = byol_chapman_utilities.ChapmanDataset(
        user_datasets, patient_to_rhythm_dict, test_train_split_dict, 'train')
    test_chapman_dataset = byol_chapman_utilities.ChapmanDataset(
        user_datasets, patient_to_rhythm_dict, test_train_split_dict, 'test')

    train_loader = DataLoader(train_chapman_dataset,
                              batch_size=batch_size,
                              shuffle=True)
    val_loader = DataLoader(
        test_chapman_dataset,
        batch_size=batch_size,
    )

    # byol training model
    byol_model = deepcopy(model)
    byol = byol_chapman_utilities.BYOL_MS(byol_model,
                                          image_size=(1250, 4),
                                          projection_size=projection_dim)
    byol_trainer = pl.Trainer(max_epochs=epoch_number,
                              accumulate_grad_batches=2048 // batch_size,
                              weights_summary=None,
                              logger=False)
    byol_trainer.fit(byol, train_loader, val_loader)

    state_dict = byol_model.state_dict()
    byol_encoder = deepcopy(encoder)
    byol_encoder.load_state_dict(state_dict)

    return byol_encoder, test_chapman_dataset, train_chapman_dataset, working_directory, path_to_embeddings
예제 #3
0
def resnet_main(testing_flag, batch_size):
    # get chapman datasets
    user_datasets, patient_to_rhythm_dict, test_train_split_dict, working_directory = get_datasets_from_paths(
        testing_flag)

    # get 4 unique rhythms
    unique_rhythms_words = set(list(patient_to_rhythm_dict.values()))
    # {'AFIB': 0, 'SB': 1, 'SR': 2, 'GSVT': 3}
    rythm_to_label_encoding = {
        rhythm: index
        for index, rhythm in enumerate(unique_rhythms_words)
    }

    # get train and test datasets and create dataloaders
    train_chapman_dataset = byol_chapman_utilities.ChapmanDataset(
        user_datasets, patient_to_rhythm_dict, test_train_split_dict, 'train')
    test_chapman_dataset = byol_chapman_utilities.ChapmanDataset(
        user_datasets, patient_to_rhythm_dict, test_train_split_dict, 'test')

    train_loader = DataLoader(train_chapman_dataset,
                              batch_size=batch_size,
                              shuffle=True)
    val_loader = DataLoader(
        test_chapman_dataset,
        batch_size=batch_size,
    )
    print('got here')
    model = resnet18()
    model.conv1 = nn.Conv2d(1,
                            64,
                            kernel_size=7,
                            stride=2,
                            padding=3,
                            bias=False)
    model_name = 'resnet18'

    # supervised learning before byol
    supervised_model = deepcopy(model)
    supervised = byol_chapman_utilities.SupervisedLightningModule(
        supervised_model)
    supervised_trainer = pl.Trainer(max_epochs=25, weights_summary=None)
    supervised_trainer.fit(supervised, train_loader, val_loader)
    supervised_accuracy = byol_chapman_utilities.accuracy_from_val_loader_and_model(
        val_loader, supervised_model)

    # byol training model
    byol_model = deepcopy(model)
    byol = byol_chapman_utilities.BYOL(byol_model, image_size=(2500, 4))
    byol_trainer = pl.Trainer(
        max_epochs=10,
        accumulate_grad_batches=2048 // batch_size,
        weights_summary=None,
    )
    byol_trainer.fit(byol, train_loader, val_loader)

    # supervised learning again after byol
    state_dict = byol_model.state_dict()
    post_byol_model = deepcopy(model)
    post_byol_model.load_state_dict(state_dict)
    post_byol_supervised = byol_chapman_utilities.SupervisedLightningModule(
        post_byol_model)
    post_byol_trainer = pl.Trainer(
        max_epochs=10,
        accumulate_grad_batches=2048 // 128,
        weights_summary=None,
    )
    post_byol_trainer.fit(post_byol_supervised, train_loader, val_loader)
    post_byol_accuracy = byol_chapman_utilities.accuracy_from_val_loader_and_model(
        val_loader, post_byol_model)

    # final results
    print(f'supervised accuracy - {supervised_accuracy}')
    print(f'post byol supervised accuracy - {post_byol_accuracy}')

    save_dict = {
        'supervised_acc': supervised_accuracy,
        'post_byol_acc': post_byol_accuracy
    }

    # save results
    start_time = datetime.datetime.now()
    start_time_str = start_time.strftime("%Y%m%d-%H%M%S")

    save_filename = f'{testing_flag}-{batch_size}-{model_name}-{start_time_str}-byol-chapman.pickle'
    save_path = os.path.join(working_directory, save_filename)

    with open(save_path, 'wb') as f:
        pickle.dump(save_dict, f)
예제 #4
0
def testing(testing_flag):
    dataset_save_path = os.path.join(os.path.dirname(os.getcwd()),
                                     "PickledData", "chapman")
    path_to_patient_to_rhythm_dict = os.path.join(
        dataset_save_path, 'patient_to_rhythm_dict.pickle')

    # paths to user datasets with no nan values
    if testing_flag:
        path_to_user_datasets = os.path.join(
            dataset_save_path, 'reduced_four_lead_user_datasets_no_nan.pickle')
        path_to_test_train_split_dict = os.path.join(
            dataset_save_path, 'reduced_test_train_split_dict_no_nan.pickle')
    else:
        path_to_user_datasets = os.path.join(
            dataset_save_path, 'four_lead_user_datasets_no_nan.pickle')
        path_to_test_train_split_dict = os.path.join(
            dataset_save_path, "test_train_split_dict_no_nan.pickle")

    with open(path_to_user_datasets, 'rb') as f:
        user_datasets = pickle.load(f)

    with open(path_to_test_train_split_dict, 'rb') as f:
        test_train_split_dict = pickle.load(f)

    train_user_list = test_train_split_dict['train']
    test_user_list = test_train_split_dict['test']

    with open(path_to_patient_to_rhythm_dict, 'rb') as f:
        patient_to_rhythm_dict = pickle.load(f)

    train_chapman_dataset = byol_chapman_utilities.ChapmanDataset(
        user_datasets, patient_to_rhythm_dict, test_train_split_dict, 'train')
    test_chapman_dataset = byol_chapman_utilities.ChapmanDataset(
        user_datasets, patient_to_rhythm_dict, test_train_split_dict, 'test')
    batch_size = 128

    train_loader = DataLoader(train_chapman_dataset,
                              batch_size=batch_size,
                              shuffle=True)
    val_loader = DataLoader(
        test_chapman_dataset,
        batch_size=batch_size,
    )

    model = autoencoder(512)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    num_epochs = 10
    for epoch in range(num_epochs):
        for data_label in train_loader:
            data, _ = data_label
            data = Variable(data)
            output = model(data.float())
            # print(data.size())
            # print(output.size())
            flattened_input = data.view(data.size(0), -1)
            loss = criterion(output, flattened_input.float())
            # print(flattened_input.size())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            print('epoch [{}/{}], loss:{:.4f}'.format(epoch + 1, num_epochs,
                                                      loss.item()))

    encoder = model.encoder
    decoder = model.decoder

    for data_label in val_loader:
        data, _ = data_label
        # data = data.view(data.size(0), -1)
        input_data = Variable(data)
        input_data = input_data.float()
        encoded_data = encoder(input_data)
        print(encoded_data.size())
        decoded_data = decoder(encoded_data.float())
        data = data.view(data.size(0), -1)
        loss = criterion(decoded_data.float(), data.float())
        print(f'val_loss: {loss.item()}')
예제 #5
0
def get_trained_autoencoder_ms(testing_flag, latent_dim=512):
    dataset_save_path = os.path.join(os.path.dirname(os.getcwd()),
                                     "PickledData", "chapman")
    path_to_patient_to_rhythm_dict = os.path.join(
        dataset_save_path, 'patient_to_rhythm_dict.pickle')

    # paths to user datasets with no nan values
    if testing_flag:
        path_to_user_datasets = os.path.join(
            dataset_save_path, 'reduced_four_lead_user_datasets_no_nan.pickle')
        path_to_test_train_split_dict = os.path.join(
            dataset_save_path, 'reduced_test_train_split_dict_no_nan.pickle')
    else:
        path_to_user_datasets = os.path.join(
            dataset_save_path, 'four_lead_user_datasets_no_nan.pickle')
        path_to_test_train_split_dict = os.path.join(
            dataset_save_path, "test_train_split_dict_no_nan.pickle")

    with open(path_to_user_datasets, 'rb') as f:
        user_datasets = pickle.load(f)

    with open(path_to_test_train_split_dict, 'rb') as f:
        test_train_split_dict = pickle.load(f)

    train_user_list = test_train_split_dict['train']
    test_user_list = test_train_split_dict['test']

    with open(path_to_patient_to_rhythm_dict, 'rb') as f:
        patient_to_rhythm_dict = pickle.load(f)

    train_chapman_dataset = byol_chapman_utilities.ChapmanDataset(
        user_datasets, patient_to_rhythm_dict, test_train_split_dict, 'train')
    test_chapman_dataset = byol_chapman_utilities.ChapmanDataset(
        user_datasets, patient_to_rhythm_dict, test_train_split_dict, 'test')

    batch_size = 128

    train_loader = DataLoader(train_chapman_dataset,
                              batch_size=batch_size,
                              shuffle=True)
    val_loader = DataLoader(
        test_chapman_dataset,
        batch_size=len(test_chapman_dataset),
    )
    # train autoencoder
    model = autoencoder(latent_dim, x_dim=1250, y_dim=4)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    num_epochs = 10
    for epoch in range(num_epochs):
        for data_label in train_loader:
            data, _ = data_label
            data1, data2 = torch.split(data, 1250, dim=2)
            for d in [data1, data2]:
                input_data = Variable(d)
                loss_data = d.view(d.size(0), -1)
                output = model(input_data.float())
                loss = criterion(output, loss_data.float())
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                if epoch % 10 == 0:
                    print('epoch [{}/{}], loss:{:.4f}'.format(
                        epoch + 1, num_epochs, loss.item()))

    # get validation
    encoder = model.encoder
    decoder = model.decoder

    for data_label in val_loader:
        data, _ = data_label
        data1, data2 = torch.split(data, 1250, dim=2)
        for d in [data1, data2]:
            input_data = Variable(d)
            loss_data = d.view(d.size(0), -1)
            encoded_data = encoder(input_data.float())
            decoded_data = decoder(encoded_data.float())
            loss = criterion(decoded_data.float(), loss_data.float())
            print(f'val_loss: {loss.item()}')

    return autoencoder, encoder, decoder