예제 #1
0
def test_set_params(augmented_mock_clf, augmentation, kwargs, random_batch):
    """Asserts that changing the parameters of a classifier instantiated with
    the `AugmentedDataLoader` is possible. Ensures that
    `braindecode.augmentation` is consistent with `Skorch` API.
    """
    augmented_mock_clf.set_params(
        iterator_train__transforms=augmentation(**kwargs)
    )
    augmented_mock_clf.set_params(
        train_split=predefined_split(random_batch)
    )
    assert isinstance(
        augmented_mock_clf.train_split,
        type(predefined_split(random_batch))
    )
예제 #2
0
    def test_pickle(self, predefined_split, data):
        from skorch.dataset import Dataset

        valid_dataset = Dataset(*data)
        train_split = predefined_split(valid_dataset)

        # does not raise
        pickle.dumps(train_split)
예제 #3
0
def train(data_folder: str, out_model: str):
    out_model = Path(out_model)
    out_model.mkdir()

    data_paths = list(Path(data_folder).rglob("*.npy"))
    train_paths, valid_paths = train_test_split(data_paths, train_size=0.7)

    train_dataset = LibriSpeechDataset(
        train_paths,
        Path(data_folder).parent / "SPEAKERS.TXT",
        Compose([ExtractStft(),
                 RandomCrop(constants.STFT_CROP_WIDTH)]))

    valid_dataset = LibriSpeechDataset(
        valid_paths,
        Path(data_folder).parent / "SPEAKERS.TXT",
        Compose([ExtractStft(),
                 RandomCrop(constants.STFT_CROP_WIDTH)]))

    net = NeuralNet(Classifier,
                    module__n_classes=constants.NUMBER_OF_CLASSES,
                    criterion=nn.CrossEntropyLoss,
                    batch_size=8,
                    max_epochs=100,
                    optimizer=optim.Adam,
                    lr=0.001,
                    iterator_train__shuffle=True,
                    iterator_train__num_workers=2,
                    iterator_valid__shuffle=False,
                    iterator_valid__num_workers=2,
                    train_split=predefined_split(valid_dataset),
                    device="cuda",
                    callbacks=[
                        Checkpoint(
                            f_params=(out_model / "params.pt").as_posix(),
                            f_optimizer=(out_model / "optim.pt").as_posix(),
                            f_history=(out_model / "history.pt").as_posix()),
                        ProgressBar(postfix_keys=["train_loss", "train_acc"]),
                        EarlyStopping(),
                        EpochScoring(acc,
                                     name="val_acc",
                                     lower_is_better=False,
                                     on_train=False),
                        EpochScoring(acc,
                                     name="train_acc",
                                     lower_is_better=False,
                                     on_train=True),
                        Tensorboard((out_model / "train").as_posix(),
                                    metrics={"acc": acc_as_metric},
                                    is_training=True),
                        Tensorboard((out_model / "valid").as_posix(),
                                    metrics={"acc": acc_as_metric},
                                    is_training=False),
                    ])

    net.fit(train_dataset)
def get_deep_learning_model(model_args, valid_dataset):
    cuda = torch.cuda.is_available()
    device = model_args["device"] if cuda else 'cpu'
    if cuda:
        torch.backends.cudnn.benchmark = True
    seed = model_args["seed"]
    # = 20200220  random seed to make results reproducible
    # Set random seed to be able to reproduce results
    if seed:
        set_random_seeds(seed=seed, cuda=cuda)

    if model_args["model_type"] == "ShallowFBCSPNet":
        model = ShallowFBCSPNet(
            model_args["n_chans"],
            model_args["n_classes"] + 1,
            input_window_samples=model_args["input_window_samples"],
            final_conv_length='auto',
        )
    elif model_args["model_type"] == "SleepStager":
        model = model = SleepStager(
            n_channels=model_args["n_chans"],
            sfreq=model_args["sfreq"],
            n_classes=model_args["n_classes"] + 1,
            input_size_s=model_args["input_window_samples"] /
            model_args["sfreq"],
        )
    else:
        raise ValueError("Boom !")

    if cuda:
        model.cuda()

    clf = EEGClassifier(
        model,
        criterion=model_args["criterion"],
        optimizer=torch.optim.AdamW,
        # using test_sample for validation
        train_split=predefined_split(valid_dataset),
        optimizer__lr=model_args["lr"],
        optimizer__weight_decay=model_args["weight_decay"],
        batch_size=model_args["batch_size"],
        callbacks=[
            "accuracy",
            ("lr_scheduler",
             LRScheduler('CosineAnnealingLR',
                         T_max=model_args["n_epochs"] - 1)),
            ("early_stopping",
             EarlyStopping(monitor='valid_loss',
                           patience=model_args["patience"]))
        ],
        device=device,
        iterator_train__num_workers=20,
        iterator_train__pin_memory=True)  # torch.in torch.out

    return clf
def parameterized_vgg11():
        return NeuralNetBinaryClassifier(
            VGG11,
            optimizer = torch.optim.Adamax, 
            max_epochs = 30,
            lr = 0.001,
            batch_size = 128,
            iterator_train__shuffle = True,
            train_split = predefined_split(dataset_test), # Supply the skorch framework with our own predefined test dataset
            callbacks = callback_list, 
            device ='cuda')
def parameterized_resnet152_96():
        return NeuralNetBinaryClassifier(
            ResNet152_96,
            optimizer = torch.optim.Adam, 
            max_epochs = 30,
            lr = 0.01,
            batch_size = 128,
            iterator_train__shuffle = True, 
            train_split = predefined_split(dataset_test),
            callbacks = callback_list, 
            device ='cuda')    
def parameterized_lenet():
        return NeuralNetBinaryClassifier(
            LeNet,
            optimizer = torch.optim.Adam, 
            max_epochs = 100,
            lr = 0.01,
            batch_size = 128,
            iterator_train__shuffle = True, # Shuffle training data on each epoch
            train_split = predefined_split(dataset_test),
            callbacks = callback_list, 
            device ='cuda')
def main():
    sampling_rate = 360

    wavelet = "mexh"  # mexh, morl, gaus8, gaus4
    scales = pywt.central_frequency(wavelet) * sampling_rate / np.arange(
        1, 101, 1)

    (x1_train, x2_train, y_train,
     groups_train), (x1_test, x2_test, y_test,
                     groups_test) = load_data(wavelet=wavelet,
                                              scales=scales,
                                              sampling_rate=sampling_rate)
    print("Data loaded successfully!")

    log_dir = "./logs/{}".format(wavelet)
    shutil.rmtree(log_dir, ignore_errors=True)

    callbacks = [
        Initializer("[conv|fc]*.weight", fn=torch.nn.init.kaiming_normal_),
        Initializer("[conv|fc]*.bias",
                    fn=partial(torch.nn.init.constant_, val=0.0)),
        LRScheduler(policy=StepLR, step_size=5, gamma=0.1),
        EpochScoring(scoring=make_scorer(f1_score, average="macro"),
                     lower_is_better=False,
                     name="valid_f1"),
        TensorBoard(SummaryWriter(log_dir))
    ]
    net = NeuralNetClassifier(  # skorch is extensive package of pytorch for compatible with scikit-learn
        MyModule,
        criterion=torch.nn.CrossEntropyLoss,
        optimizer=torch.optim.Adam,
        lr=0.001,
        max_epochs=30,
        batch_size=1024,
        train_split=predefined_split(
            Dataset({
                "x1": x1_test,
                "x2": x2_test
            }, y_test)),
        verbose=1,
        device="cuda",
        callbacks=callbacks,
        iterator_train__shuffle=True,
        optimizer__weight_decay=0,
    )
    net.fit({"x1": x1_train, "x2": x2_train}, y_train)
    y_true, y_pred = y_test, net.predict({"x1": x1_test, "x2": x2_test})

    print(confusion_matrix(y_true, y_pred))
    print(classification_report(y_true, y_pred, digits=4))

    net.save_params(f_params="./models/model_{}.pkl".format(wavelet))
예제 #9
0
def main(fin, logdir):
    folders = list(Path(fin).glob("*/*"))
    ftrain, ftest = train_test_split(folders, random_state=SEED)
    train = RawDataset(ftrain, transform=transform(train=True))
    test = RawDataset(ftest, transform=transform(train=False))

    model = build_model(
        max_epochs=50,
        logdir=logdir,
        train_split=predefined_split(test),
    )
    model.fit(train)

    th = np.arange(0.1, 0.9, 0.01)
    mean, std = model.thresholds(test, partial(dice, th=th))
    plot(mean, thresholds=th)
예제 #10
0
파일: train.py 프로젝트: kqf/hubmap
def main(fin, logdir):
    with open(fin) as f:
        folders = json.load(f)

    train = RawDataset(folders["train"], transform=transform(train=True))
    test = RawDataset(folders["test"], transform=transform(train=False))

    model = build_model(
        max_epochs=50,
        logdir=logdir,
        train_split=predefined_split(test),
    )
    model.fit(train)

    th = np.arange(0.1, 0.9, 0.01)
    mean, std = model.thresholds(test, partial(dice, th=th))
    plot(mean, thresholds=th)
예제 #11
0
    def train_(self,
               train_set,
               valid_set,
               lr=5e-4,
               batch_size=16,
               max_nb_epochs=20,
               early_stopping_patience=5,
               early_stopping_monitor='valid_bal_acc'):
        # Train using a GPU if possible
        device = "cuda" if torch.cuda.is_available() else "cpu"

        # Callbacks
        train_bal_acc = EpochScoring(scoring='balanced_accuracy',
                                     on_train=True,
                                     name='train_bal_acc',
                                     lower_is_better=False)
        valid_bal_acc = EpochScoring(scoring='balanced_accuracy',
                                     on_train=False,
                                     name='valid_bal_acc',
                                     lower_is_better=False)
        early_stopping = EarlyStopping(monitor=early_stopping_monitor,
                                       patience=early_stopping_patience,
                                       lower_is_better='loss'
                                       in early_stopping_monitor)
        callbacks = [
            ('train_bal_acc', train_bal_acc),
            ('valid_bal_acc', valid_bal_acc),
            ('progress_bar', ProgressBar()),
            ('early_stopping', early_stopping),
        ]

        # Skorch model creation
        skorch_net = EEGTransformer(self.to(device),
                                    criterion=torch.nn.CrossEntropyLoss,
                                    optimizer=torch.optim.Adam,
                                    optimizer__lr=lr,
                                    train_split=predefined_split(valid_set),
                                    batch_size=batch_size,
                                    callbacks=callbacks,
                                    device=device)

        # Training: `y` is None since it is already supplied in the dataset.
        skorch_net.fit(train_set, y=None, epochs=max_nb_epochs)

        return skorch_net
예제 #12
0
def main():
    run = wandb.init()
    cfg = wandb.config
    filepath = './data/' + cfg.dataset

    device = get_device(cfg)
    feature_list = get_feature_list()

    X_train, X_valid, X_test, y_train, y_valid, y_test = load_data(
        filepath, cfg, feature_list)
    valid_ds = Dataset(X_valid, y_valid)

    model = mlp.MLPModule(input_units=cfg.n_features,
                          hidden_units=cfg.hidden_units,
                          num_hidden=cfg.layers,
                          dropout=cfg.dropout).to(device)

    class_weights = class_weight.compute_class_weight('balanced',
                                                      np.unique(y_train),
                                                      y_train)
    net = NeuralNetClassifier(
        model,
        max_epochs=cfg.epochs,
        batch_size=cfg.batch_size,
        criterion=nn.CrossEntropyLoss,
        criterion__weight=torch.FloatTensor(class_weights).to(device),
        optimizer=torch.optim.SGD,
        optimizer__lr=cfg.learning_rate,
        optimizer__weight_decay=cfg.weight_decay,
        device=device,
        train_split=predefined_split(valid_ds),
        callbacks=[],
        iterator_train__shuffle=True if cfg.shuffle else False,
        warm_start=False)

    net.initialize()
    net.fit(X_train, y_train)

    y_pred = net.predict(X_test)
예제 #13
0
def test_eeg_classifier():
    # 5,6,7,10,13,14 are codes for executed and imagined hands/feet
    subject_id = 1
    event_codes = [5, 6, 9, 10, 13, 14]

    # This will download the files if you don't have them yet,
    # and then return the paths to the files.
    physionet_paths = mne.datasets.eegbci.load_data(subject_id,
                                                    event_codes,
                                                    update_path=False)

    # Load each of the files
    parts = [
        mne.io.read_raw_edf(path,
                            preload=True,
                            stim_channel="auto",
                            verbose="WARNING") for path in physionet_paths
    ]

    # Concatenate them
    raw = concatenate_raws(parts)

    # Find the events in this dataset
    events, _ = mne.events_from_annotations(raw)

    # Use only EEG channels
    eeg_channel_inds = mne.pick_types(raw.info,
                                      meg=False,
                                      eeg=True,
                                      stim=False,
                                      eog=False,
                                      exclude="bads")

    # Extract trials, only using EEG channels
    epoched = mne.Epochs(
        raw,
        events,
        dict(hands=2, feet=3),
        tmin=1,
        tmax=4.1,
        proj=False,
        picks=eeg_channel_inds,
        baseline=None,
        preload=True,
    )

    # Convert data from volt to millivolt
    # Pytorch expects float32 for input and int64 for labels.
    X = (epoched.get_data() * 1e6).astype(np.float32)
    y = (epoched.events[:, 2] - 2).astype(np.int64)  # 2,3 -> 0,1

    # Set if you want to use GPU
    # You can also use torch.cuda.is_available() to determine if cuda is available on your machine.
    cuda = False
    set_random_seeds(seed=20170629, cuda=cuda)

    # This will determine how many crops are processed in parallel
    input_window_samples = 450
    n_classes = 2
    in_chans = X.shape[1]
    # final_conv_length determines the size of the receptive field of the ConvNet
    model = ShallowFBCSPNet(
        in_chans=in_chans,
        n_classes=n_classes,
        input_window_samples=input_window_samples,
        final_conv_length=12,
    )
    to_dense_prediction_model(model)

    if cuda:
        model.cuda()

    # determine output size
    test_input = np_to_var(
        np.ones((2, in_chans, input_window_samples, 1), dtype=np.float32))
    if cuda:
        test_input = test_input.cuda()
    out = model(test_input)
    n_preds_per_input = out.cpu().data.numpy().shape[2]

    train_set = create_from_X_y(X[:48],
                                y[:48],
                                drop_last_window=False,
                                window_size_samples=input_window_samples,
                                window_stride_samples=n_preds_per_input)

    valid_set = create_from_X_y(X[48:60],
                                y[48:60],
                                drop_last_window=False,
                                window_size_samples=input_window_samples,
                                window_stride_samples=n_preds_per_input)

    cropped_cb_train = CroppedTrialEpochScoring(
        "accuracy",
        name="train_trial_accuracy",
        lower_is_better=False,
        on_train=True,
    )

    cropped_cb_valid = CroppedTrialEpochScoring(
        "accuracy",
        on_train=False,
        name="valid_trial_accuracy",
        lower_is_better=False,
    )

    clf = EEGClassifier(
        model,
        criterion=CroppedLoss,
        criterion__loss_function=nll_loss,
        optimizer=optim.Adam,
        train_split=predefined_split(valid_set),
        batch_size=32,
        callbacks=[
            ("train_trial_accuracy", cropped_cb_train),
            ("valid_trial_accuracy", cropped_cb_valid),
        ],
    )

    clf.fit(train_set, y=None, epochs=4)

    expected = [
        {
            "batches": [
                {
                    "train_loss": 1.9391239881515503,
                    "train_batch_size": 32
                },
                {
                    "train_loss": 2.895704507827759,
                    "train_batch_size": 32
                },
                {
                    "train_loss": 1.0713893175125122,
                    "train_batch_size": 32
                },
                {
                    "valid_loss": 1.1811838150024414,
                    "valid_batch_size": 24
                },
            ],
            "epoch":
            1,
            "train_batch_count":
            3,
            "valid_batch_count":
            1,
            "train_loss":
            1.9687392711639404,
            "train_loss_best":
            True,
            "valid_loss":
            1.1811838150024414,
            "valid_loss_best":
            True,
            "train_trial_accuracy":
            0.4791666666666667,
            "train_trial_accuracy_best":
            True,
            "valid_trial_accuracy":
            0.5,
            "valid_trial_accuracy_best":
            True,
        },
        {
            "batches": [
                {
                    "train_loss": 1.5488793849945068,
                    "train_batch_size": 32
                },
                {
                    "train_loss": 1.1174801588058472,
                    "train_batch_size": 32
                },
                {
                    "train_loss": 1.1525697708129883,
                    "train_batch_size": 32
                },
                {
                    "valid_loss": 2.202029228210449,
                    "valid_batch_size": 24
                },
            ],
            "epoch":
            2,
            "train_batch_count":
            3,
            "valid_batch_count":
            1,
            "train_loss":
            1.2729764382044475,
            "train_loss_best":
            True,
            "valid_loss":
            2.202029228210449,
            "valid_loss_best":
            False,
            "train_trial_accuracy":
            0.5,
            "train_trial_accuracy_best":
            True,
            "valid_trial_accuracy":
            0.5,
            "valid_trial_accuracy_best":
            False,
        },
        {
            "batches": [
                {
                    "train_loss": 1.0049529075622559,
                    "train_batch_size": 32
                },
                {
                    "train_loss": 1.0266971588134766,
                    "train_batch_size": 32
                },
                {
                    "train_loss": 1.0799436569213867,
                    "train_batch_size": 32
                },
                {
                    "valid_loss": 1.0638500452041626,
                    "valid_batch_size": 24
                },
            ],
            "epoch":
            3,
            "train_batch_count":
            3,
            "valid_batch_count":
            1,
            "train_loss":
            1.0371979077657063,
            "train_loss_best":
            True,
            "valid_loss":
            1.0638500452041626,
            "valid_loss_best":
            True,
            "train_trial_accuracy":
            0.5,
            "train_trial_accuracy_best":
            False,
            "valid_trial_accuracy":
            0.5,
            "valid_trial_accuracy_best":
            False,
        },
        {
            "batches": [
                {
                    "train_loss": 1.0052555799484253,
                    "train_batch_size": 32
                },
                {
                    "train_loss": 0.8479514718055725,
                    "train_batch_size": 32
                },
                {
                    "train_loss": 0.9589881300926208,
                    "train_batch_size": 32
                },
                {
                    "valid_loss": 0.8794112801551819,
                    "valid_batch_size": 24
                },
            ],
            "epoch":
            4,
            "train_batch_count":
            3,
            "valid_batch_count":
            1,
            "train_loss":
            0.9373983939488729,
            "train_loss_best":
            True,
            "valid_loss":
            0.8794112801551819,
            "valid_loss_best":
            True,
            "train_trial_accuracy":
            0.5,
            "train_trial_accuracy_best":
            False,
            "valid_trial_accuracy":
            0.5,
            "valid_trial_accuracy_best":
            False,
        },
    ]

    history_without_dur = [{k: v
                            for k, v in h.items() if k != "dur"}
                           for h in clf.history]
    assert_deep_allclose(expected, history_without_dur, atol=1e-3, rtol=1e-3)
def test_eeg_classifier():
    # 5,6,7,10,13,14 are codes for executed and imagined hands/feet
    subject_id = 1
    event_codes = [5, 6, 9, 10, 13, 14]

    # This will download the files if you don't have them yet,
    # and then return the paths to the files.
    physionet_paths = mne.datasets.eegbci.load_data(subject_id,
                                                    event_codes,
                                                    update_path=False)

    # Load each of the files
    parts = [
        mne.io.read_raw_edf(path,
                            preload=True,
                            stim_channel="auto",
                            verbose="WARNING") for path in physionet_paths
    ]

    # Concatenate them
    raw = concatenate_raws(parts)

    # Find the events in this dataset
    events, _ = mne.events_from_annotations(raw)

    # Use only EEG channels
    eeg_channel_inds = mne.pick_types(raw.info,
                                      meg=False,
                                      eeg=True,
                                      stim=False,
                                      eog=False,
                                      exclude="bads")

    # Extract trials, only using EEG channels
    epoched = mne.Epochs(
        raw,
        events,
        dict(hands=2, feet=3),
        tmin=1,
        tmax=4.1,
        proj=False,
        picks=eeg_channel_inds,
        baseline=None,
        preload=True,
    )

    # Convert data from volt to millivolt
    # Pytorch expects float32 for input and int64 for labels.
    X = (epoched.get_data() * 1e6).astype(np.float32)
    y = (epoched.events[:, 2] - 2).astype(np.int64)  # 2,3 -> 0,1

    # Set if you want to use GPU
    # You can also use torch.cuda.is_available() to determine if cuda is available on your machine.
    cuda = False
    set_random_seeds(seed=20170629, cuda=cuda)

    # This will determine how many crops are processed in parallel
    input_window_samples = 450
    n_classes = 2
    in_chans = X.shape[1]
    # final_conv_length determines the size of the receptive field of the ConvNet
    model = ShallowFBCSPNet(
        in_chans=in_chans,
        n_classes=n_classes,
        input_window_samples=input_window_samples,
        final_conv_length=12,
    )
    to_dense_prediction_model(model)

    if cuda:
        model.cuda()

    # determine output size
    test_input = np_to_var(
        np.ones((2, in_chans, input_window_samples, 1), dtype=np.float32))
    if cuda:
        test_input = test_input.cuda()
    out = model(test_input)
    n_preds_per_input = out.cpu().data.numpy().shape[2]

    train_set = create_from_X_y(X[:48],
                                y[:48],
                                drop_last_window=False,
                                window_size_samples=input_window_samples,
                                window_stride_samples=n_preds_per_input)

    valid_set = create_from_X_y(X[48:60],
                                y[48:60],
                                drop_last_window=False,
                                window_size_samples=input_window_samples,
                                window_stride_samples=n_preds_per_input)

    cropped_cb_train = CroppedTrialEpochScoring(
        "accuracy",
        name="train_trial_accuracy",
        lower_is_better=False,
        on_train=True,
    )

    cropped_cb_valid = CroppedTrialEpochScoring(
        "accuracy",
        on_train=False,
        name="valid_trial_accuracy",
        lower_is_better=False,
    )

    clf = EEGClassifier(
        model,
        cropped=True,
        criterion=CroppedLoss,
        criterion__loss_function=nll_loss,
        optimizer=optim.Adam,
        train_split=predefined_split(valid_set),
        batch_size=32,
        callbacks=[
            ("train_trial_accuracy", cropped_cb_train),
            ("valid_trial_accuracy", cropped_cb_valid),
        ],
    )

    clf.fit(train_set, y=None, epochs=4)

    expected = [{
        'batches': [{
            'train_batch_size': 32,
            'train_loss': 1.6639312505722046
        }, {
            'train_batch_size': 32,
            'train_loss': 2.6161606311798096
        }, {
            'train_batch_size': 32,
            'train_loss': 1.627132773399353
        }, {
            'valid_batch_size': 24,
            'valid_loss': 0.9677614569664001
        }],
        'epoch':
        1,
        'train_batch_count':
        3,
        'train_loss':
        1.9690748850504558,
        'train_loss_best':
        True,
        'train_trial_accuracy':
        0.4791666666666667,
        'train_trial_accuracy_best':
        True,
        'valid_batch_count':
        1,
        'valid_loss':
        0.9677614569664001,
        'valid_loss_best':
        True,
        'valid_trial_accuracy':
        0.5,
        'valid_trial_accuracy_best':
        True
    }, {
        'batches': [{
            'train_batch_size': 32,
            'train_loss': 1.3829222917556763
        }, {
            'train_batch_size': 32,
            'train_loss': 1.3123714923858643
        }, {
            'train_batch_size': 32,
            'train_loss': 1.0109959840774536
        }, {
            'valid_batch_size': 24,
            'valid_loss': 1.9435862302780151
        }],
        'epoch':
        2,
        'train_batch_count':
        3,
        'train_loss':
        1.2354299227396648,
        'train_loss_best':
        True,
        'train_trial_accuracy':
        0.5,
        'train_trial_accuracy_best':
        True,
        'valid_batch_count':
        1,
        'valid_loss':
        1.9435862302780151,
        'valid_loss_best':
        False,
        'valid_trial_accuracy':
        0.5,
        'valid_trial_accuracy_best':
        False
    }, {
        'batches': [{
            'train_batch_size': 32,
            'train_loss': 1.172208547592163
        }, {
            'train_batch_size': 32,
            'train_loss': 0.8899562954902649
        }, {
            'train_batch_size': 32,
            'train_loss': 1.0232216119766235
        }, {
            'valid_batch_size': 24,
            'valid_loss': 0.9585554599761963
        }],
        'epoch':
        3,
        'train_batch_count':
        3,
        'train_loss':
        1.0284621516863506,
        'train_loss_best':
        True,
        'train_trial_accuracy':
        0.5,
        'train_trial_accuracy_best':
        False,
        'valid_batch_count':
        1,
        'valid_loss':
        0.9585554599761963,
        'valid_loss_best':
        True,
        'valid_trial_accuracy':
        0.5,
        'valid_trial_accuracy_best':
        False
    }, {
        'batches': [{
            'train_batch_size': 32,
            'train_loss': 0.9693693518638611
        }, {
            'train_batch_size': 32,
            'train_loss': 0.900641918182373
        }, {
            'train_batch_size': 32,
            'train_loss': 0.8839665651321411
        }, {
            'valid_batch_size': 24,
            'valid_loss': 0.873468816280365
        }],
        'epoch':
        4,
        'train_batch_count':
        3,
        'train_loss':
        0.9179926117261251,
        'train_loss_best':
        True,
        'train_trial_accuracy':
        0.625,
        'train_trial_accuracy_best':
        True,
        'valid_batch_count':
        1,
        'valid_loss':
        0.873468816280365,
        'valid_loss_best':
        True,
        'valid_trial_accuracy':
        0.4166666666666667,
        'valid_trial_accuracy_best':
        False
    }]

    history_without_dur = [{k: v
                            for k, v in h.items() if k != "dur"}
                           for h in clf.history]
    assert_deep_allclose(expected, history_without_dur, atol=1e-3, rtol=1e-3)
예제 #15
0
if re.compile('/Users/long/').match(location):
    my_callbacks = [
        "accuracy",
        ("lr_scheduler", LRScheduler('CosineAnnealingLR', T_max=n_epochs - 1)),
        ('on_epoch_begin_callback', on_epoch_begin_callback),
        ('on_batch_end_callback', on_batch_end_callback),
    ]
elif re.compile('/content/drive').match(location):
    my_callbacks = [
        "accuracy",
        ("lr_scheduler", LRScheduler('CosineAnnealingLR', T_max=n_epochs - 1)),
    ]

clf = EEGClassifier(
    net,
    #criterion=torch.nn.NLLLoss,  #torch.nn.NLLLoss/CrossEntropyLoss
    criterion=torch.nn.CrossEntropyLoss,
    optimizer=torch.optim.Adam,  #optimizer=torch.optim.AdamW,
    train_split=predefined_split(
        valid_set
    ),  # using valid_set for validation; None means no validate:both train and test on training dataset.
    optimizer__lr=lr,
    optimizer__weight_decay=weight_decay,
    batch_size=batch_size,
    callbacks=my_callbacks,
    device=device,
)
# Model training for a specified number of epochs. `y` is None as it is already supplied
# in the dataset.
clf.fit(train_set, y=None, epochs=n_epochs)
예제 #16
0
        dataset_train = Alzheimer_Dataset(X_train,
                                          y_train,
                                          transform=data_transforms['train'])
        dataset_val = Alzheimer_Dataset(X_val,
                                        y_val,
                                        transform=data_transforms['train'])

        checkpoint = Checkpoint(f_params=save_model,
                                monitor='valid_acc_best',
                                f_optimizer=save_opt,
                                f_history=hist)
        seed_everything = FixRandomSeed()
        net = NeuralNetClassifier(model,
                                  criterion=nn.CrossEntropyLoss,
                                  optimizer=optim.SGD,
                                  lr=lr,
                                  batch_size=batch_size,
                                  max_epochs=ep,
                                  optimizer__momentum=0.90,
                                  iterator_train__shuffle=True,
                                  iterator_train__num_workers=8,
                                  iterator_valid__shuffle=True,
                                  iterator_valid__num_workers=8,
                                  train_split=predefined_split(dataset_val),
                                  callbacks=[
                                      lrscheduler, checkpoint, seed_everything,
                                      early_stopping
                                  ],
                                  device=device)
        # split once only
        net.fit(dataset_train, y=None)
예제 #17
0
    valid_ds = dataset.Dataset(data['valid_X'], data['valid_y'])

    # ----------- train model ----------
    model = SpatioSpectralCNN3D(in_channel=data['train_X'].shape[2],
                                in_filter=data['train_X'].shape[-1],
                                conv1_filter=20,
                                conv2_filter=80,
                                in_class=2)

    net = NeuralNetClassifier(model,
                              max_epochs=100,
                              lr=0.000625,
                              optimizer=torch.optim.Adam,
                              criterion=torch.nn.CrossEntropyLoss,
                              train_split=predefined_split(valid_ds),
                              callbacks=[
                                  earlystop,
                                  ('train_acc',
                                   EpochScoring('accuracy',
                                                on_train=True,
                                                lower_is_better=False))
                              ],
                              device='cuda',
                              verbose=1)
    history = net.fit(data['train_X_NSCM'], data['train_y'])

    # ----------- test model ----------
    predictions = net.predict(data['test_X'])
    accuracy = np.mean(predictions == data['test_y'])
    accuracis[param['class_pair'], param['participant'],
    return sklearn.metrics.f1_score(y_true, y_pred, average='macro')


# In[28]:


net = NeuralNetClassifier(
    MyModule,
    max_epochs=100,
    lr=0.001,
    batch_size=1024,
    optimizer=Adam,
    iterator_train__shuffle=True,
    iterator_train__num_workers=4,
    iterator_train__pin_memory=True,
    train_split=predefined_split(val),
    callbacks=[LRScheduler(policy=CosineAnnealingLR, T_max=64),
               EpochScoring(macrof1, use_caching=True, lower_is_better=False),
               EpochScoring(microf1, use_caching=True, lower_is_better=False),
               Checkpoint(monitor='macrof1_best', dirname='model')],
    device='cuda',
    verbose=1
)

print('start training')
_ = net.fit(tra, y=None)
# net.initialize()
net.load_params(f_params='model/params.pt', f_optimizer='model/optimizer.pt', f_history='model/history.json')

# In[ ]:
예제 #19
0
        callbacks=[
            cp,
            # ('lr_scheduler', schedule),  # Use with SGD optimizer
        ],
        lr=setting['initial_lr'],
        module__input_size=x_train.shape[1],
        module__hidden_layer_sizes=hidden_layer_sizes,
        module__dropout=setting['dropout'],
        module__output_size=output_size,
        criterion=criterion,
        optimizer=torch.optim.Adam,  # torch.optim.SGD,
        batch_size=128,
        warm_start=False,
        verbose=2,
        device='cuda',
        train_split=predefined_split(Dataset(x_val, y_val)),  # holdout val set
        optimizer__weight_decay=setting['wd'],
        # optimizer__momentum=setting['momentum'],  # Use with SGD optimizer
        optimizer__amsgrad=setting['amsgrad'],
    )

    if use_crossval:
        # This script is no longer intended to use cross-validation. Please use
        # the provided val and test sets in EgoCom.
        pass
    else:
        model.fit(x_train, y_train, epochs=epochs)
    print(" * Test Acc (last epoch): {:.6f}".format(model.score(
        x_test, y_test)))
    model.load_params(checkpoint=cp)
    print(" ** Test Acc (best val): {:.6f}".format(model.score(x_test,
예제 #20
0
def main():
    """
    Run an active learning experiment.

    Sample command:
    ```
    python training/run_modAL_experiment.py --al_epochs_init=10 --al_epochs_incr=5 --al_n_iter=10 --al_samples_per_iter=100 --data_class=DroughtWatch --model_class=ResnetClassifier --batch_size=64 --n_train_images=1000 --n_validation_images=1000 --pretrained=True --wandb
    ```
    """

    # generic setup steps from run_experiment
    # ---------------------------------------

    parser = _setup_parser()
    args = parser.parse_args()
    data_class = _import_class(f"active_learning.data.{args.data_class}")
    model_class = _import_class(f"active_learning.models.{args.model_class}")
    data = data_class(args)
    model = model_class(data_config=data.config(), args=args)

    if args.loss not in ("ctc", "transformer"):
        lit_model_class = lit_models.BaseLitModel

    if args.loss == "ctc":
        lit_model_class = lit_models.CTCLitModel

    if args.loss == "transformer":
        lit_model_class = lit_models.TransformerLitModel

    if args.load_checkpoint is not None:
        lit_model = lit_model_class.load_from_checkpoint(args.load_checkpoint, args=args, model=model)
    else:
        lit_model = lit_model_class(args=args, model=model)

    # modAL specific experiment setup
    # -------------------------------

    # initialize wandb with pytorch model
    if args.wandb:
        wandb.init(config=args)
        wandb.watch(model, log_freq=100)

    # evaluate query strategy from args parameter
    if args.al_query_strategy in ["uncertainty_sampling", "margin_sampling", "entropy_sampling"]:
        query_strategy = _import_class(f"modAL.uncertainty.{args.al_query_strategy}")
    else:
        query_strategy = _import_class(f"active_learning.sampling.{args.al_query_strategy}")

    # cpu vs. gpu: ignore --gpu args param, instead just set gpu based on availability
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # initialize train, validation and pool datasets
    data.setup()

    X_initial = np.moveaxis(
        data.data_train.data, 3, 1
    )  # shape change: (i, channels, h, w) instead of (i, h, w, channels)
    y_initial = data.data_train.targets
    if args.reduced_develop_train_size:
        print("NOTE: Reduced initial train set size for development activated")
        X_initial = X_initial[:100, :, :, :]
        y_initial = y_initial[:100]

    X_val = np.moveaxis(data.data_val.data, 3, 1)  # shape change
    y_val = data.data_val.targets
    X_pool = np.moveaxis(data.data_unlabelled.data, 3, 1)  # shape change
    y_pool = data.data_unlabelled.targets

    # initialize skorch classifier
    classifier = NeuralNetClassifier(
        model,
        criterion=torch.nn.CrossEntropyLoss,
        optimizer=torch.optim.Adam,
        train_split=predefined_split(Dataset(X_val, y_val)),
        verbose=1,
        device=device,
    )

    lit_model.summarize(mode="full")

    # initialize modal active learner
    print("Initializing model with base training set")
    learner = ActiveLearner(
        estimator=classifier,
        X_training=X_initial,
        y_training=y_initial,
        epochs=args.al_epochs_init,
        query_strategy=query_strategy,
    )

    _log_skorch_history(
        history=learner.estimator.history,
        al_iter=0,
        epoch_start=0,
        train_acc=learner.score(learner.X_training, learner.y_training),
        train_size=len(learner.y_training),
        wandb_logging=args.wandb,
    )

    # active learning loop
    for idx in range(args.al_n_iter):

        print("Active learning query no. %d" % (idx + 1))
        query_idx, _ = learner.query(X_pool, n_instances=args.al_samples_per_iter)
        learner.teach(
            X=X_pool[query_idx], y=y_pool[query_idx], only_new=args.al_incr_onlynew, epochs=args.al_epochs_incr
        )

        _log_skorch_history(
            history=learner.estimator.history,
            al_iter=idx + 1,
            epoch_start=args.al_epochs_init + idx * args.al_epochs_incr,
            train_acc=learner.score(learner.X_training, learner.y_training),
            train_size=len(learner.y_training),
            wandb_logging=args.wandb,
        )

        # remove queried instances from pool
        X_pool = np.delete(X_pool, query_idx, axis=0)
        y_pool = np.delete(y_pool, query_idx, axis=0)
예제 #21
0
파일: test_model.py 프로젝트: kqf/hubmap
def test_model(fake_dataset):
    dataset = RawDataset(list(fake_dataset.glob("*/")), transform=transform())

    model = build_model(train_split=predefined_split(dataset))
    model.fit(dataset)
    model.thresholds(dataset)
def test_variable_length_trials_cropped_decoding():
    cuda = False
    set_random_seeds(seed=20210726, cuda=cuda)

    # create fake tuh abnormal dataset
    tuh = _TUHAbnormalMock(path='')
    # fake variable length trials by cropping first recording
    splits = tuh.split([[i] for i in range(len(tuh.datasets))])
    preprocess(
        concat_ds=splits['0'],
        preprocessors=[
            Preprocessor('crop', tmax=300),
        ],
    )
    variable_tuh = BaseConcatDataset(
        [splits[str(i)] for i in range(len(tuh.datasets))])
    # make sure we actually have different length trials
    assert any(np.diff([ds.raw.n_times for ds in variable_tuh.datasets]) != 0)

    # create windows
    variable_tuh_windows = create_fixed_length_windows(
        concat_ds=variable_tuh,
        window_size_samples=1000,
        window_stride_samples=1000,
        drop_last_window=False,
        mapping={
            True: 1,
            False: 0
        },
    )

    # create train and valid set
    splits = variable_tuh_windows.split(
        [[i] for i in range(len(variable_tuh_windows.datasets))])
    variable_tuh_windows_train = BaseConcatDataset(
        [splits[str(i)] for i in range(len(tuh.datasets) - 1)])
    variable_tuh_windows_valid = BaseConcatDataset(
        [splits[str(len(tuh.datasets) - 1)]])
    for x, y, ind in variable_tuh_windows_train:
        break
    train_split = predefined_split(variable_tuh_windows_valid)

    # initialize a model
    model = ShallowFBCSPNet(
        in_chans=x.shape[0],
        n_classes=len(tuh.description.pathological.unique()),
    )
    to_dense_prediction_model(model)
    if cuda:
        model.cuda()

    # create and train a classifier
    clf = EEGClassifier(
        model,
        cropped=True,
        criterion=CroppedLoss,
        criterion__loss_function=torch.nn.functional.nll_loss,
        optimizer=torch.optim.Adam,
        batch_size=32,
        callbacks=['accuracy'],
        train_split=train_split,
    )
    clf.fit(variable_tuh_windows_train, y=None, epochs=3)

    # make sure it does what we expect
    np.testing.assert_allclose(
        clf.history[:, 'train_loss'],
        np.array([
            0.689495325088501,
            0.1353449523448944,
            0.006638816092163324,
        ]),
        rtol=1e-1,
        atol=1e-1,
    )

    np.testing.assert_allclose(
        clf.history[:, 'valid_loss'],
        np.array([
            2.925871,
            3.611423,
            4.23494,
        ]),
        rtol=1e-1,
        atol=1e-1,
    )
valid_bal_acc = EpochScoring(scoring='balanced_accuracy',
                             on_train=False,
                             name='valid_bal_acc',
                             lower_is_better=False)
callbacks = [('train_bal_acc', train_bal_acc),
             ('valid_bal_acc', valid_bal_acc)]

clf = EEGClassifier(
    model,
    criterion=torch.nn.CrossEntropyLoss,
    criterion__weight=torch.Tensor(class_weights).to(device),
    optimizer=torch.optim.Adam,
    iterator_train__shuffle=False,
    iterator_train__sampler=train_sampler,
    iterator_valid__sampler=valid_sampler,
    train_split=predefined_split(valid_set),  # using valid_set for validation
    optimizer__lr=lr,
    batch_size=batch_size,
    callbacks=callbacks,
    device=device)
# Model training for a specified number of epochs. `y` is None as it is already
# supplied in the dataset.
clf.fit(train_set, y=None, epochs=n_epochs)

######################################################################
# Plot results
# ------------
#
# We use the history stored by Skorch during training to plot the performance of
# the model throughout training. Specifically, we plot the loss and the balanced
# balanced accuracy for the training and validation sets.
예제 #24
0
def test_cropped_decoding():
    # 5,6,7,10,13,14 are codes for executed and imagined hands/feet
    subject_id = 1
    event_codes = [5, 6, 9, 10, 13, 14]

    # This will download the files if you don't have them yet,
    # and then return the paths to the files.
    physionet_paths = mne.datasets.eegbci.load_data(
        subject_id, event_codes, update_path=False
    )

    # Load each of the files
    parts = [
        mne.io.read_raw_edf(
            path, preload=True, stim_channel="auto", verbose="WARNING"
        )
        for path in physionet_paths
    ]

    # Concatenate them
    raw = concatenate_raws(parts)

    # Find the events in this dataset
    events, _ = mne.events_from_annotations(raw)
    # Use only EEG channels
    eeg_channel_inds = mne.pick_types(
        raw.info, meg=False, eeg=True, stim=False, eog=False, exclude="bads"
    )

    # Extract trials, only using EEG channels
    epoched = mne.Epochs(
        raw,
        events,
        dict(hands=2, feet=3),
        tmin=1,
        tmax=4.1,
        proj=False,
        picks=eeg_channel_inds,
        baseline=None,
        preload=True,
    )
    # Convert data from volt to millivolt
    # Pytorch expects float32 for input and int64 for labels.
    X = (epoched.get_data() * 1e6).astype(np.float32)
    y = (epoched.events[:, 2] - 2).astype(np.int64)  # 2,3 -> 0,1

    # Set if you want to use GPU
    # You can also use torch.cuda.is_available() to determine if cuda is available on your machine.
    cuda = False
    set_random_seeds(seed=20170629, cuda=cuda)

    # This will determine how many crops are processed in parallel
    input_time_length = 450
    n_classes = 2
    in_chans = X.shape[1]
    # final_conv_length determines the size of the receptive field of the ConvNet
    model = ShallowFBCSPNet(
        in_chans=in_chans,
        n_classes=n_classes,
        input_time_length=input_time_length,
        final_conv_length=12,
    )
    to_dense_prediction_model(model)

    if cuda:
        model.cuda()

    # Perform forward pass to determine how many outputs per input
    n_preds_per_input = get_output_shape(model, in_chans, input_time_length)[2]

    train_set = CroppedXyDataset(X[:60], y[:60],
                                 input_time_length=input_time_length,
                                 n_preds_per_input=n_preds_per_input)
    valid_set = CroppedXyDataset(X[60:], y=y[60:],
                                 input_time_length=input_time_length,
                                 n_preds_per_input=n_preds_per_input)
    train_split = predefined_split(valid_set)

    clf = EEGClassifier(
        model,
        cropped=True,
        criterion=CroppedLoss,
        criterion__loss_function=torch.nn.functional.nll_loss,
        optimizer=optim.Adam,
        train_split=train_split,
        batch_size=32,
        callbacks=['accuracy'],
    )

    clf.fit(train_set, y=None, epochs=4)

    np.testing.assert_allclose(
        clf.history[:, 'train_loss'],
        np.array(
            [
                1.455306,
                1.455934,
                1.210563,
                1.065806
            ]
        ),
        rtol=1e-4,
        atol=1e-5,
    )

    np.testing.assert_allclose(
        clf.history[:, 'valid_loss'],
        np.array(
            [
                2.547288,
                1.51785,
                1.394036,
                1.064355
            ]
        ),
        rtol=1e-4,
        atol=1e-4,
    )
    np.testing.assert_allclose(
        clf.history[:, 'train_accuracy'],
        np.array(
            [
                0.5,
                0.5,
                0.5,
                0.533333
            ]
        ),
        rtol=1e-4,
        atol=1e-5,
    )
    np.testing.assert_allclose(
        clf.history[:, 'valid_accuracy'],
        np.array(
            [
                0.533333,
                0.466667,
                0.533333,
                0.5
            ]
        ),
        rtol=1e-4,
        atol=1e-5,
    )
예제 #25
0
def train(subject_id):

    print('\n--------------------------------------------------\n')
    print(
        'Training on BCI_IV_2a dataset | Cross-subject | ID: {:02d}\n'.format(
            subject_id))

    ##### subject_range = [subject_id]
    subject_range = [x for x in range(1, 10)]

    dataset = MOABBDataset(dataset_name="BNCI2014001",
                           subject_ids=subject_range)

    ######################################################################
    # Preprocessing

    low_cut_hz = 4.  # low cut frequency for filtering
    high_cut_hz = 38.  # high cut frequency for filtering
    # Parameters for exponential moving standardization
    factor_new = 1e-3
    init_block_size = 1000

    preprocessors = [
        Preprocessor('pick_types', eeg=True, eog=False, meg=False,
                     stim=False),  # Keep EEG sensors
        Preprocessor(lambda x: x * 1e6),  # Convert from V to uV
        Preprocessor('filter', l_freq=low_cut_hz,
                     h_freq=high_cut_hz),  # Bandpass filter
        #Preprocessor('set_eeg_reference', ref_channels='average', ch_type='eeg'),
        Preprocessor('resample', sfreq=125),
        Preprocessor(covariance_align),

        ## Preprocessor(exponential_moving_standardize,  # Exponential moving standardization
        ## factor_new=factor_new, init_block_size=init_block_size)
        ## Preprocessor('pick_channels', ch_names=short_ch_names, ordered=True),
    ]

    # Transform the data
    print('Preprocessing dataset\n')
    preprocess(dataset, preprocessors)

    ######################################################################
    # Cut Compute Windows
    # ~~~~~~~~~~~~~~~~~~~

    trial_start_offset_seconds = -0.5
    trial_stop_offset_seconds = 0.0
    # Extract sampling frequency, check that they are same in all datasets
    sfreq = dataset.datasets[0].raw.info['sfreq']
    assert all([ds.raw.info['sfreq'] == sfreq for ds in dataset.datasets])
    # Calculate the trial start offset in samples.
    trial_start_offset_samples = int(trial_start_offset_seconds * sfreq)
    trial_stop_offset_samples = int(trial_stop_offset_seconds * sfreq)

    # Create windows using braindecode function for this. It needs parameters to define how
    # trials should be used.
    print('Windowing dataset\n')
    windows_dataset = create_windows_from_events(
        dataset,
        # picks=["Fz", "FC3", "FC1", "FCz", "FC2", "FC4", "C5", "C3", "C1", "Cz", "C2", "C4", "C6", "CP3", "CP1", "CPz", "CP2", "CP4", "P1", "Pz", "P2", "POz"],
        trial_start_offset_samples=trial_start_offset_samples,
        trial_stop_offset_samples=trial_stop_offset_samples,
        preload=True,
    )

    print('Computing covariances of each WindowsDataset')
    windows_dataset.compute_covariances_concat()

    # print(windows_dataset.datasets[0].windows)

    ######################################################################
    # Merge multiple datasets into a single WindowDataset
    # metadata_all = [ds.windows.metadata for ds in windows_dataset.datasets]
    # metadata_full = pd.concat(metadata_all)
    """
	epochs_all = [ds.windows for ds in windows_dataset.datasets]
	epochs_full = mne.concatenate_epochs(epochs_all)
	full_dataset = WindowsDataset(windows=epochs_full, description=None, transform=None)
	windows_dataset = full_dataset
	"""
    ######################################################################
    # Split dataset into train and valid

    # keep only session 1:
    # temp = windows_dataset.split( 'session' )
    # windows_dataset = temp['session_T']

    # print(windows_dataset.datasets[0].windows)
    # print(windows_dataset.datasets[0].windows.get_data().shape)
    # quit()

    subject_column = windows_dataset.description['subject'].values
    inds_train = list(np.where(subject_column != subject_id)[0])
    inds_valid = list(np.where(subject_column == subject_id)[0])
    splitted = windows_dataset.split([inds_train, inds_valid])
    train_set = splitted['0']
    valid_set = splitted['1']

    #######

    epochs_all = [ds.windows for ds in train_set.datasets]
    epochs_full = mne.concatenate_epochs(epochs_all)
    trialwise_weights_all = [ds.trialwise_weights for ds in train_set.datasets]
    trialwise_weights_full = np.hstack(trialwise_weights_all)
    full_dataset = WindowsDataset(windows=epochs_full,
                                  description=None,
                                  transform=None)
    full_dataset.trialwise_weights = trialwise_weights_full
    train_set = full_dataset
    # print(train_set.windows.metadata)
    ######################################################################
    # Create model

    cuda = torch.cuda.is_available(
    )  # check if GPU is available, if True chooses to use it
    device = 'cuda' if cuda else 'cpu'
    if cuda:
        torch.backends.cudnn.benchmark = True
    seed = 20200220  # random seed to make results reproducible
    # Set random seed to be able to reproduce results
    set_random_seeds(seed=seed, cuda=cuda)

    n_classes = 4
    # Extract number of chans and time steps from dataset
    n_chans = train_set[0][0].shape[0]
    input_window_samples = train_set[0][0].shape[1]
    """
	model = ShallowFBCSPNet(
		n_chans,
		n_classes,
		input_window_samples=input_window_samples,
		final_conv_length='auto')
	"""
    """
	model = EEGNetv1(
			n_chans,
			n_classes,
			input_window_samples=input_window_samples,
			final_conv_length="auto",
			pool_mode="mean",
			second_kernel_size=(2, 32),
			third_kernel_size=(8, 4),
			drop_prob=0.25)
	"""
    """
	model = HybridNet(n_chans, n_classes,
					input_window_samples=input_window_samples)
	"""
    """
	model = TCN(n_chans, n_classes,
				n_blocks=6,
				n_filters=32,
				kernel_size=9,
				drop_prob=0.0,
				add_log_softmax=True)
	"""

    model = EEGNetv4(
        n_chans,
        n_classes,
        input_window_samples=input_window_samples,
        final_conv_length="auto",
        pool_mode="mean",
        F1=8,
        D=2,
        F2=16,  # usually set to F1*D (?)
        kernel_length=64,
        third_kernel_size=(8, 4),
        drop_prob=0.2)

    if cuda:
        model.cuda()

    ######################################################################
    # Training

    # These values we found good for shallow network:
    lr = 0.01  # 0.0625 * 0.01
    weight_decay = 0.0005

    # For deep4 they should be:
    # lr = 1 * 0.01
    # weight_decay = 0.5 * 0.001

    batch_size = 64
    n_epochs = 100

    # clf = EEGClassifier(
    clf = EEGClassifier_weighted(
        model,
        criterion=torch.nn.NLLLoss,
        optimizer=torch.optim.SGD,  #AdamW,
        train_split=predefined_split(
            valid_set),  # using valid_set for validation
        optimizer__lr=lr,
        optimizer__momentum=0.9,
        optimizer__weight_decay=weight_decay,
        batch_size=batch_size,
        callbacks=[
            "accuracy",  #("lr_scheduler", LRScheduler('CosineAnnealingLR', T_max=n_epochs - 1)),
        ],
        device=device,
    )
    # Model training for a specified number of epochs. `y` is None as it is already supplied
    # in the dataset.
    clf.fit(train_set, y=None, epochs=n_epochs)

    results_columns = [
        'train_loss', 'valid_loss', 'train_accuracy', 'valid_accuracy'
    ]
    df = pd.DataFrame(clf.history[:, results_columns],
                      columns=results_columns,
                      index=clf.history[:, 'epoch'])

    val_accs = df['valid_accuracy'].values
    max_val_acc = 100.0 * np.max(val_accs)

    return max_val_acc
예제 #26
0
            module__hidden_dim=opt['hidden_layer_dim'],
            optimizer__weight_decay=opt['l2_weight'],
            module__dropout=opt['dropout'],
            device='cuda',
            # Training
            max_epochs=opt['max_epochs'],
            batch_size=opt['batch_size'],
            callbacks=[
                Checkpoint(dirname=save_dir,
                           f_params='params.pt',
                           f_optimizer=None,
                           f_history=None,
                           monitor='valid_loss_best')
            ],
            # train_split is validation data
            train_split=predefined_split(Dataset(X_val, y_val)),
            # Optimizer
            optimizer=optim.Adam,
            lr=opt['learning_rate'],
            # Data
            iterator_train__shuffle=True,
            verbose=(runs == 1))

        net.fit(X_train, y_train)

        # Reload best valid loss checkpoint
        net.load_params(save_dir.joinpath('params.pt'))

        # Evaluate
        preds = net.predict(X_train)
        train_acc = accuracy_score(y_train, preds)
예제 #27
0
def test_trialwise_decoding():
    # 5,6,7,10,13,14 are codes for executed and imagined hands/feet
    subject_id = 1
    event_codes = [5, 6, 9, 10, 13, 14]

    # This will download the files if you don't have them yet,
    # and then return the paths to the files.
    physionet_paths = mne.datasets.eegbci.load_data(subject_id,
                                                    event_codes,
                                                    update_path=False)

    # Load each of the files
    parts = [
        mne.io.read_raw_edf(path,
                            preload=True,
                            stim_channel="auto",
                            verbose="WARNING") for path in physionet_paths
    ]

    # Concatenate them
    raw = concatenate_raws(parts)
    raw.apply_function(lambda x: x * 1000000)

    # Find the events in this dataset
    events, _ = mne.events_from_annotations(raw)
    # Use only EEG channels
    eeg_channel_inds = mne.pick_types(raw.info,
                                      meg=False,
                                      eeg=True,
                                      stim=False,
                                      eog=False,
                                      exclude="bads")

    # Extract trials, only using EEG channels
    epoched = mne.Epochs(
        raw,
        events,
        dict(hands=2, feet=3),
        tmin=1,
        tmax=4.1,
        proj=False,
        picks=eeg_channel_inds,
        baseline=None,
        preload=True,
    )

    ds = EpochsDataset(epoched)

    train_set = Subset(ds, np.arange(60))
    valid_set = Subset(ds, np.arange(60, len(ds)))

    train_valid_split = predefined_split(valid_set)

    cuda = False
    if cuda:
        device = 'cuda'
    else:
        device = 'cpu'
    set_random_seeds(seed=20170629, cuda=cuda)
    n_classes = 2
    in_chans = train_set[0][0].shape[0]
    input_time_length = train_set[0][0].shape[1]
    model = ShallowFBCSPNet(
        in_chans=in_chans,
        n_classes=n_classes,
        input_time_length=input_time_length,
        final_conv_length="auto",
    )
    if cuda:
        model.cuda()

    clf = EEGClassifier(
        model,
        cropped=False,
        criterion=torch.nn.NLLLoss,
        optimizer=torch.optim.Adam,
        train_split=train_valid_split,
        optimizer__lr=0.001,
        batch_size=30,
        callbacks=["accuracy"],
        device=device,
    )
    clf.fit(train_set, y=None, epochs=6)

    np.testing.assert_allclose(
        clf.history[:, 'train_loss'],
        np.array([
            1.1114974617958069, 1.0976492166519165, 0.668171226978302,
            0.5880511999130249, 0.7054798305034637, 0.5272344648838043
        ]),
        rtol=1e-4,
        atol=1e-5,
    )
    np.testing.assert_allclose(
        clf.history[:, 'valid_loss'],
        np.array([
            0.8467752933502197, 0.9804958701133728, 0.9134824872016907,
            0.8305345773696899, 0.8263336420059204, 0.8535978198051453
        ]),
        rtol=1e-4,
        atol=1e-5,
    )
    np.testing.assert_allclose(
        clf.history[:, 'train_accuracy'],
        np.array([
            0.7166666666666667, 0.6666666666666666, 0.85, 0.9333333333333333,
            0.9166666666666666, 0.9
        ]),
        rtol=1e-4,
        atol=1e-5,
    )
    np.testing.assert_allclose(
        clf.history[:, 'valid_accuracy'],
        np.array([
            0.6, 0.5666666666666667, 0.5333333333333333, 0.5333333333333333,
            0.6, 0.6666666666666666
        ]),
        rtol=1e-4,
        atol=1e-5,
    )
def exp(subject_id):
    import torch
    test_subj = np.r_[subject_id]
    print('test subj:' + str(test_subj))
    # train_subj = np.setdiff1d(np.r_[1:10], test_subj)
    train_subj = np.setdiff1d(np.r_[1, 3, 7, 8], test_subj)

    tr = []
    val = []
    for ids in train_subj:
        train_size = int(0.99 * len(splitted[ids]))
        test_size = len(splitted[ids]) - train_size
        tr_i, val_i = torch.utils.data.random_split(splitted[ids],
                                                    [train_size, test_size])
        tr.append(tr_i)
        val.append(val_i)

    train_set = torch.utils.data.ConcatDataset(tr)
    valid_set = torch.utils.data.ConcatDataset(val)
    valid_set = BaseConcatDataset([splitted[ids] for ids in test_subj])

    ######################################################################
    # Create model
    # ------------
    #

    ######################################################################
    # Now we create the deep learning model! Braindecode comes with some
    # predefined convolutional neural network architectures for raw
    # time-domain EEG. Here, we use the shallow ConvNet model from `Deep
    # learning with convolutional neural networks for EEG decoding and
    # visualization <https://arxiv.org/abs/1703.05051>`__. These models are
    # pure `PyTorch <https://pytorch.org>`__ deep learning models, therefore
    # to use your own model, it just has to be a normal PyTorch
    # `nn.Module <https://pytorch.org/docs/stable/nn.html#torch.nn.Module>`__.
    #

    import torch
    from braindecode.util import set_random_seeds
    from braindecode.models import ShallowFBCSPNet, Deep4Net

    cuda = torch.cuda.is_available(
    )  # check if GPU is available, if True chooses to use it
    device = 'cuda:0' if cuda else 'cpu'
    if cuda:
        torch.backends.cudnn.benchmark = True
    seed = 20200220  # random seed to make results reproducible
    # Set random seed to be able to reproduce results
    set_random_seeds(seed=seed, cuda=cuda)

    n_classes = 3
    # Extract number of chans and time steps from dataset
    n_chans = train_set[0][0].shape[0]
    input_window_samples = train_set[0][0].shape[1]
    #
    # model = ShallowFBCSPNet(
    #     n_chans,
    #     n_classes,
    #     input_window_samples=input_window_samples,
    #     final_conv_length='auto',
    # )

    from mynetworks import Deep4Net_origin, ConvClfNet, FcClfNet

    model = Deep4Net(
        n_chans,
        n_classes,
        input_window_samples=input_window_samples,
        final_conv_length="auto",
    )

    #
    # embedding_net = Deep4Net_origin(4, 22, input_window_samples)
    # model = FcClfNet(embedding_net)
    # #

    print(model)

    # Send model to GPU
    if cuda:
        model.cuda()

    ######################################################################
    # Training
    # --------
    #

    ######################################################################
    # Now we train the network! EEGClassifier is a Braindecode object
    # responsible for managing the training of neural networks. It inherits
    # from skorch.NeuralNetClassifier, so the training logic is the same as in
    # `Skorch <https://skorch.readthedocs.io/en/stable/>`__.
    #

    ######################################################################
    #    **Note**: In this tutorial, we use some default parameters that we
    #    have found to work well for motor decoding, however we strongly
    #    encourage you to perform your own hyperparameter optimization using
    #    cross validation on your training data.
    #

    from skorch.callbacks import LRScheduler
    from skorch.helper import predefined_split

    from braindecode import EEGClassifier
    # # These values we found good for shallow network:
    lr = 0.0625 * 0.01
    weight_decay = 0

    # For deep4 they should be:
    # lr = 1 * 0.01
    # weight_decay = 0.5 * 0.001

    batch_size = 8
    n_epochs = 100

    clf = EEGClassifier(
        model,
        criterion=torch.nn.NLLLoss,
        optimizer=torch.optim.AdamW,
        train_split=predefined_split(
            valid_set),  # using valid_set for validation
        optimizer__lr=lr,
        optimizer__weight_decay=weight_decay,
        batch_size=batch_size,
        callbacks=[
            "accuracy",
            ("lr_scheduler",
             LRScheduler('CosineAnnealingLR', T_max=n_epochs - 1)),
        ],
        device=device,
    )
    # Model training for a specified number of epochs. `y` is None as it is already supplied
    # in the dataset.
    clf.fit(train_set, y=None, epochs=n_epochs)

    ######################################################################
    # Plot Results
    # ------------
    #

    ######################################################################
    # Now we use the history stored by Skorch throughout training to plot
    # accuracy and loss curves.
    #

    import matplotlib.pyplot as plt
    from matplotlib.lines import Line2D
    import pandas as pd
    # Extract loss and accuracy values for plotting from history object
    results_columns = [
        'train_loss', 'valid_loss', 'train_accuracy', 'valid_accuracy'
    ]
    df = pd.DataFrame(clf.history[:, results_columns],
                      columns=results_columns,
                      index=clf.history[:, 'epoch'])

    # get percent of misclass for better visual comparison to loss
    df = df.assign(train_misclass=100 - 100 * df.train_accuracy,
                   valid_misclass=100 - 100 * df.valid_accuracy)

    plt.style.use('seaborn')
    fig, ax1 = plt.subplots(figsize=(8, 3))
    df.loc[:, ['train_loss', 'valid_loss']].plot(ax=ax1,
                                                 style=['-', ':'],
                                                 marker='o',
                                                 color='tab:blue',
                                                 legend=False,
                                                 fontsize=14)

    ax1.tick_params(axis='y', labelcolor='tab:blue', labelsize=14)
    ax1.set_ylabel("Loss", color='tab:blue', fontsize=14)

    ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

    df.loc[:, ['train_misclass', 'valid_misclass']].plot(ax=ax2,
                                                         style=['-', ':'],
                                                         marker='o',
                                                         color='tab:red',
                                                         legend=False)
    ax2.tick_params(axis='y', labelcolor='tab:red', labelsize=14)
    ax2.set_ylabel("Misclassification Rate [%]", color='tab:red', fontsize=14)
    ax2.set_ylim(ax2.get_ylim()[0], 85)  # make some room for legend
    ax1.set_xlabel("Epoch", fontsize=14)

    # where some data has already been plotted to ax
    handles = []
    handles.append(
        Line2D([0], [0],
               color='black',
               linewidth=1,
               linestyle='-',
               label='Train'))
    handles.append(
        Line2D([0], [0],
               color='black',
               linewidth=1,
               linestyle=':',
               label='Valid'))
    plt.legend(handles, [h.get_label() for h in handles], fontsize=14)
    plt.tight_layout()

    # plt.show()

    return df
#     window_stride_samples=n_preds_per_input,
#     drop_last_window=False,
#     drop_bad_windows=True,
# )

# splits = dataset.split("session")
# train_set = splits["train"]
# valid_set = splits["eval"]

regressor = EEGRegressor(
    model,
    cropped=True,
    criterion=CroppedLoss,
    criterion__loss_function=torch.nn.functional.mse_loss,
    optimizer=torch.optim.AdamW,
    train_split=predefined_split(valid_set),
    optimizer__lr=optimizer_lr,
    optimizer__weight_decay=optimizer_weight_decay,
    iterator_train__shuffle=True,
    batch_size=batch_size,
    callbacks=[
        "neg_root_mean_squared_error",
        # seems n_epochs -1 leads to desired behavior of lr=0 after end of data?
        ("lr_scheduler", LRScheduler('CosineAnnealingLR', T_max=n_epochs - 1)),
    ],
    device=device,
)

regressor.fit(train_set, y=None, epochs=n_epochs)

if __name__ == '__main__':
예제 #30
0
def test_trialwise_decoding():
    # 5,6,7,10,13,14 are codes for executed and imagined hands/feet
    subject_id = 1
    event_codes = [5, 6, 9, 10, 13, 14]

    # This will download the files if you don't have them yet,
    # and then return the paths to the files.
    physionet_paths = mne.datasets.eegbci.load_data(subject_id,
                                                    event_codes,
                                                    update_path=False)

    # Load each of the files
    parts = [
        mne.io.read_raw_edf(path,
                            preload=True,
                            stim_channel="auto",
                            verbose="WARNING") for path in physionet_paths
    ]

    # Concatenate them
    raw = concatenate_raws(parts)
    raw.apply_function(lambda x: x * 1000000)

    # Find the events in this dataset
    events, _ = mne.events_from_annotations(raw)
    # Use only EEG channels
    eeg_channel_inds = mne.pick_types(raw.info,
                                      meg=False,
                                      eeg=True,
                                      stim=False,
                                      eog=False,
                                      exclude="bads")

    # Extract trials, only using EEG channels
    epoched = mne.Epochs(
        raw,
        events,
        dict(hands=2, feet=3),
        tmin=1,
        tmax=4.1,
        proj=False,
        picks=eeg_channel_inds,
        baseline=None,
        preload=True,
    )

    ds = EpochsDataset(epoched)

    train_set = Subset(ds, np.arange(60))
    valid_set = Subset(ds, np.arange(60, len(ds)))

    train_valid_split = predefined_split(valid_set)

    cuda = False
    if cuda:
        device = 'cuda'
    else:
        device = 'cpu'
    set_random_seeds(seed=20170629, cuda=cuda)
    n_classes = 2
    in_chans = train_set[0][0].shape[0]
    input_window_samples = train_set[0][0].shape[1]
    model = ShallowFBCSPNet(
        in_chans=in_chans,
        n_classes=n_classes,
        input_window_samples=input_window_samples,
        final_conv_length="auto",
    )
    if cuda:
        model.cuda()

    clf = EEGClassifier(
        model,
        cropped=False,
        criterion=torch.nn.NLLLoss,
        optimizer=torch.optim.Adam,
        train_split=train_valid_split,
        optimizer__lr=0.001,
        batch_size=30,
        callbacks=["accuracy"],
        device=device,
    )
    clf.fit(train_set, y=None, epochs=6)

    np.testing.assert_allclose(
        clf.history[:, 'train_loss'],
        np.array([
            1.1114967465400696, 1.0180627405643463, 0.8020123243331909,
            0.8934760391712189, 0.8401200771331787, 0.5898805856704712
        ]),
        rtol=1e-4,
        atol=1e-5,
    )
    np.testing.assert_allclose(
        clf.history[:, 'valid_loss'],
        np.array([
            0.8467752933502197, 1.0855580568313599, 0.873993992805481,
            0.8403236865997314, 0.8534432053565979, 0.8854812383651733
        ]),
        rtol=1e-4,
        atol=1e-5,
    )
    np.testing.assert_allclose(
        clf.history[:, 'train_accuracy'],
        np.array(
            [0.7166666666666667, 0.6666666666666666, 0.8, 0.9, 0.95, 0.95]),
        rtol=1e-4,
        atol=1e-5,
    )
    np.testing.assert_allclose(
        clf.history[:, 'valid_accuracy'],
        np.array([
            0.6, 0.5666666666666667, 0.5666666666666667, 0.5,
            0.5333333333333333, 0.6333333333333333
        ]),
        rtol=1e-4,
        atol=1e-5,
    )