def test_exponential_running_demean(mock_data):
    mock_input, _, expected_data = mock_data
    demeaned_data = exponential_running_demean(mock_input)
    assert mock_input.shape == demeaned_data.shape == expected_data.shape
    np.testing.assert_allclose(demeaned_data,
                               expected_data,
                               rtol=1e-4,
                               atol=1e-4)
def test_exponential_running_init_block_size(mock_data):
    mock_input, _, _ = mock_data
    init_block_size = 3
    standardized_data = exponential_running_standardize(
        mock_input, init_block_size=init_block_size)
    np.testing.assert_allclose(standardized_data[:, :init_block_size].sum(),
                               [0],
                               rtol=1e-4,
                               atol=1e-4)

    demeaned_data = exponential_running_demean(mock_input,
                                               init_block_size=init_block_size)
    np.testing.assert_allclose(demeaned_data[:, :init_block_size].sum(), [0],
                               rtol=1e-4,
                               atol=1e-4)
def run_exp(max_recording_mins, n_recordings, sec_to_cut,
            duration_recording_mins, max_abs_val, max_min_threshold,
            max_min_expected, shrink_val, max_min_remove, batch_set_zero_val,
            batch_set_zero_test, sampling_freq, low_cut_hz, high_cut_hz,
            exp_demean, exp_standardize, moving_demean, moving_standardize,
            channel_demean, channel_standardize, divisor, n_folds, i_test_fold,
            input_time_length, final_conv_length, pool_stride, n_blocks_to_add,
            sigmoid, model_constraint, batch_size, max_epochs,
            only_return_exp):
    cuda = True

    preproc_functions = []
    preproc_functions.append(lambda data, fs: (
        data[:, int(sec_to_cut * fs):-int(sec_to_cut * fs)], fs))
    preproc_functions.append(lambda data, fs: (data[:, :int(
        duration_recording_mins * 60 * fs)], fs))
    if max_abs_val is not None:
        preproc_functions.append(
            lambda data, fs: (np.clip(data, -max_abs_val, max_abs_val), fs))
    if max_min_threshold is not None:
        preproc_functions.append(lambda data, fs: (clean_jumps(
            data, 200, max_min_threshold, max_min_expected, cuda), fs))
    if max_min_remove is not None:
        window_len = 200
        preproc_functions.append(lambda data, fs: (set_jumps_to_zero(
            data,
            window_len=window_len,
            threshold=max_min_remove,
            cuda=cuda,
            clip_min_max_to_zero=True), fs))

    if shrink_val is not None:
        preproc_functions.append(lambda data, fs: (shrink_spikes(
            data,
            shrink_val,
            1,
            9,
        ), fs))

    preproc_functions.append(lambda data, fs: (resampy.resample(
        data, fs, sampling_freq, axis=1, filter='kaiser_fast'), sampling_freq))
    preproc_functions.append(lambda data, fs: (bandpass_cnt(
        data, low_cut_hz, high_cut_hz, fs, filt_order=4, axis=1), fs))

    if exp_demean:
        preproc_functions.append(lambda data, fs: (exponential_running_demean(
            data.T, factor_new=0.001, init_block_size=100).T, fs))
    if exp_standardize:
        preproc_functions.append(
            lambda data, fs: (exponential_running_standardize(
                data.T, factor_new=0.001, init_block_size=100).T, fs))
    if moving_demean:
        preproc_functions.append(lambda data, fs: (padded_moving_demean(
            data, axis=1, n_window=201), fs))
    if moving_standardize:
        preproc_functions.append(lambda data, fs: (padded_moving_standardize(
            data, axis=1, n_window=201), fs))
    if channel_demean:
        preproc_functions.append(lambda data, fs: (demean(data, axis=1), fs))
    if channel_standardize:
        preproc_functions.append(lambda data, fs:
                                 (standardize(data, axis=1), fs))
    if divisor is not None:
        preproc_functions.append(lambda data, fs: (data / divisor, fs))

    dataset = DiagnosisSet(n_recordings=n_recordings,
                           max_recording_mins=max_recording_mins,
                           preproc_functions=preproc_functions)
    if not only_return_exp:
        X, y = dataset.load()

    splitter = Splitter(
        n_folds,
        i_test_fold,
    )
    if not only_return_exp:
        train_set, valid_set, test_set = splitter.split(X, y)
        del X, y  # shouldn't be necessary, but just to make sure
    else:
        train_set = None
        valid_set = None
        test_set = None

    set_random_seeds(seed=20170629, cuda=cuda)
    if sigmoid:
        n_classes = 1
    else:
        n_classes = 2
    in_chans = 21

    net = Deep4Net(
        in_chans=in_chans,
        n_classes=n_classes,
        input_time_length=input_time_length,
        final_conv_length=final_conv_length,
        pool_time_length=pool_stride,
        pool_time_stride=pool_stride,
        n_filters_2=50,
        n_filters_3=80,
        n_filters_4=120,
    )
    model = net_with_more_layers(net, n_blocks_to_add, nn.MaxPool2d)
    if sigmoid:
        model = to_linear_plus_minus_net(model)
    optimizer = optim.Adam(model.parameters())
    to_dense_prediction_model(model)
    log.info("Model:\n{:s}".format(str(model)))
    if cuda:
        model.cuda()
    # determine output size
    test_input = np_to_var(
        np.ones((2, in_chans, input_time_length, 1), dtype=np.float32))
    if cuda:
        test_input = test_input.cuda()
    out = model(test_input)
    n_preds_per_input = out.cpu().data.numpy().shape[2]
    log.info("{:d} predictions per input/trial".format(n_preds_per_input))
    iterator = CropsFromTrialsIterator(batch_size=batch_size,
                                       input_time_length=input_time_length,
                                       n_preds_per_input=n_preds_per_input)
    if sigmoid:
        loss_function = lambda preds, targets: binary_cross_entropy_with_logits(
            th.mean(preds, dim=2)[:, 1, 0], targets.type_as(preds))
    else:
        loss_function = lambda preds, targets: F.nll_loss(
            th.mean(preds, dim=2)[:, :, 0], targets)

    if model_constraint is not None:
        model_constraint = MaxNormDefaultConstraint()
    monitors = [
        LossMonitor(),
        MisclassMonitor(col_suffix='sample_misclass'),
        CroppedTrialMisclassMonitor(input_time_length),
        RuntimeMonitor(),
    ]
    stop_criterion = MaxEpochs(max_epochs)
    batch_modifier = None
    if batch_set_zero_val is not None:
        batch_modifier = RemoveMinMaxDiff(batch_set_zero_val,
                                          clip_max_abs=True,
                                          set_zero=True)
    if (batch_set_zero_val is not None) and (batch_set_zero_test == True):
        iterator = ModifiedIterator(
            iterator,
            batch_modifier,
        )
        batch_modifier = None
    exp = Experiment(model,
                     train_set,
                     valid_set,
                     test_set,
                     iterator,
                     loss_function,
                     optimizer,
                     model_constraint,
                     monitors,
                     stop_criterion,
                     remember_best_column='valid_misclass',
                     run_after_early_stop=True,
                     batch_modifier=batch_modifier,
                     cuda=cuda)
    if not only_return_exp:
        exp.run()
    else:
        exp.dataset = dataset
        exp.splitter = splitter

    return exp
Exemple #4
0
def run_exp(max_recording_mins, n_recordings, sec_to_cut,
            duration_recording_mins, max_abs_val, max_min_threshold,
            max_min_expected, shrink_val, max_min_remove, batch_set_zero_val,
            batch_set_zero_test, sampling_freq, low_cut_hz, high_cut_hz,
            exp_demean, exp_standardize, moving_demean, moving_standardize,
            channel_demean, channel_standardize, divisor, n_folds, i_test_fold,
            model_name, input_time_length, final_conv_length, batch_size,
            max_epochs, only_return_exp):
    cuda = True

    preproc_functions = []
    preproc_functions.append(lambda data, fs: (
        data[:, int(sec_to_cut * fs):-int(sec_to_cut * fs)], fs))
    preproc_functions.append(lambda data, fs: (data[:, :int(
        duration_recording_mins * 60 * fs)], fs))
    if max_abs_val is not None:
        preproc_functions.append(
            lambda data, fs: (np.clip(data, -max_abs_val, max_abs_val), fs))
    if max_min_threshold is not None:
        preproc_functions.append(lambda data, fs: (clean_jumps(
            data, 200, max_min_threshold, max_min_expected, cuda), fs))
    if max_min_remove is not None:
        window_len = 200
        preproc_functions.append(lambda data, fs: (set_jumps_to_zero(
            data,
            window_len=window_len,
            threshold=max_min_remove,
            cuda=cuda,
            clip_min_max_to_zero=True), fs))

    if shrink_val is not None:
        preproc_functions.append(lambda data, fs: (shrink_spikes(
            data,
            shrink_val,
            1,
            9,
        ), fs))

    preproc_functions.append(lambda data, fs: (resampy.resample(
        data, fs, sampling_freq, axis=1, filter='kaiser_fast'), sampling_freq))
    preproc_functions.append(lambda data, fs: (bandpass_cnt(
        data, low_cut_hz, high_cut_hz, fs, filt_order=4, axis=1), fs))

    if exp_demean:
        preproc_functions.append(lambda data, fs: (exponential_running_demean(
            data.T, factor_new=0.001, init_block_size=100).T, fs))
    if exp_standardize:
        preproc_functions.append(
            lambda data, fs: (exponential_running_standardize(
                data.T, factor_new=0.001, init_block_size=100).T, fs))
    if moving_demean:
        preproc_functions.append(lambda data, fs: (padded_moving_demean(
            data, axis=1, n_window=201), fs))
    if moving_standardize:
        preproc_functions.append(lambda data, fs: (padded_moving_standardize(
            data, axis=1, n_window=201), fs))
    if channel_demean:
        preproc_functions.append(lambda data, fs: (demean(data, axis=1), fs))
    if channel_standardize:
        preproc_functions.append(lambda data, fs:
                                 (standardize(data, axis=1), fs))
    if divisor is not None:
        preproc_functions.append(lambda data, fs: (data / divisor, fs))

    all_file_names, labels = get_all_sorted_file_names_and_labels()
    lengths = np.load(
        '/home/schirrmr/code/auto-diagnosis/sorted-recording-lengths.npy')
    mask = lengths < max_recording_mins * 60
    cleaned_file_names = np.array(all_file_names)[mask]
    cleaned_labels = labels[mask]

    diffs_per_rec = np.load(
        '/home/schirrmr/code/auto-diagnosis/diffs_per_recording.npy')

    def create_set(inds):
        X = []
        for i in inds:
            log.info("Load {:s}".format(cleaned_file_names[i]))
            x = load_data(cleaned_file_names[i], preproc_functions)
            X.append(x)
        y = cleaned_labels[inds].astype(np.int64)
        return SignalAndTarget(X, y)

    if not only_return_exp:
        folds = get_balanced_batches(n_recordings,
                                     None,
                                     False,
                                     n_batches=n_folds)
        test_inds = folds[i_test_fold]
        valid_inds = folds[i_test_fold - 1]
        all_inds = list(range(n_recordings))
        train_inds = np.setdiff1d(all_inds, np.union1d(test_inds, valid_inds))

        rec_nr_sorted_by_diff = np.argsort(diffs_per_rec)[::-1]
        train_inds = rec_nr_sorted_by_diff[train_inds]
        valid_inds = rec_nr_sorted_by_diff[valid_inds]
        test_inds = rec_nr_sorted_by_diff[test_inds]

        train_set = create_set(train_inds)
        valid_set = create_set(valid_inds)
        test_set = create_set(test_inds)
    else:
        train_set = None
        valid_set = None
        test_set = None

    set_random_seeds(seed=20170629, cuda=cuda)
    # This will determine how many crops are processed in parallel
    n_classes = 2
    in_chans = 21
    if model_name == 'shallow':
        model = ShallowFBCSPNet(
            in_chans=in_chans,
            n_classes=n_classes,
            input_time_length=input_time_length,
            final_conv_length=final_conv_length).create_network()
    elif model_name == 'deep':
        model = Deep4Net(in_chans,
                         n_classes,
                         input_time_length=input_time_length,
                         final_conv_length=final_conv_length).create_network()

    optimizer = optim.Adam(model.parameters())
    to_dense_prediction_model(model)
    log.info("Model:\n{:s}".format(str(model)))
    if cuda:
        model.cuda()
    # determine output size
    test_input = np_to_var(
        np.ones((2, in_chans, input_time_length, 1), dtype=np.float32))
    if cuda:
        test_input = test_input.cuda()
    out = model(test_input)
    n_preds_per_input = out.cpu().data.numpy().shape[2]
    log.info("{:d} predictions per input/trial".format(n_preds_per_input))
    iterator = CropsFromTrialsIterator(batch_size=batch_size,
                                       input_time_length=input_time_length,
                                       n_preds_per_input=n_preds_per_input)
    loss_function = lambda preds, targets: F.nll_loss(
        th.mean(preds, dim=2)[:, :, 0], targets)
    model_constraint = None
    monitors = [
        LossMonitor(),
        MisclassMonitor(col_suffix='sample_misclass'),
        CroppedTrialMisclassMonitor(input_time_length),
        RuntimeMonitor(),
    ]
    stop_criterion = MaxEpochs(max_epochs)
    batch_modifier = None
    if batch_set_zero_val is not None:
        batch_modifier = RemoveMinMaxDiff(batch_set_zero_val,
                                          clip_max_abs=True,
                                          set_zero=True)
    if (batch_set_zero_val is not None) and (batch_set_zero_test == True):
        iterator = ModifiedIterator(
            iterator,
            batch_modifier,
        )
        batch_modifier = None
    exp = Experiment(model,
                     train_set,
                     valid_set,
                     test_set,
                     iterator,
                     loss_function,
                     optimizer,
                     model_constraint,
                     monitors,
                     stop_criterion,
                     remember_best_column='valid_misclass',
                     run_after_early_stop=True,
                     batch_modifier=batch_modifier,
                     cuda=cuda)
    if not only_return_exp:
        exp.run()
    else:
        exp.dataset = None
        exp.splitter = None

    return exp