Esempio n. 1
0
def test_merge_overlapping_hfos():
    onset = [1.5, 2.0]
    duration = [0.1, 1.0]
    ch_name = ['A1', 'A1']
    sfreq = 1000

    # create the annotations dataframe
    annot_df = create_annotations_df(onset, duration, ch_name)
    annot_df['sample'] = annot_df['onset'] * sfreq

    # first, test that no merging occurs when it shouldn't
    # merge overlapping HFOs should result in the exact same
    # annotations dataframe
    new_annot_df = merge_overlapping_events(annot_df)
    pd.testing.assert_frame_equal(annot_df, new_annot_df)

    # now
    onset = [1.5, 2.0, 1.55]
    duration = [0.1, 1.0, 0.5]
    ch_name = ['A1', 'A1', 'A1']

    # create the annotations dataframe
    annot_df = create_annotations_df(onset, duration, ch_name)
    annot_df['sample'] = annot_df['onset'] * sfreq

    # nexxt, test that merging occurs when all three events overlap
    # merge overlapping HFOs should result in the exact same
    # annotations dataframe
    new_annot_df = merge_overlapping_events(annot_df)
    assert new_annot_df.shape == (1, 5)
    assert new_annot_df['onset'].values == [1.5]
    assert new_annot_df['duration'].values == [0.55]
    assert new_annot_df['channels'].values == ['A1']
Esempio n. 2
0
def test_create_annot_df():
    onset = [1.5, 2.0, 3]
    duration = [0.0, 0, 1.5]
    ch_name = ['A1', 'A2', 'A3']
    annotation_label = ['ripple', 'frandr', 'fast-ripple']

    # without using annotation label, everything is labeled HFO
    annot_df = create_annotations_df(onset, duration, ch_name)
    assert len(annot_df['label'].unique()) == 1
    assert annot_df['label'][0] == 'HFO'

    # using annotation label, everything is labeled
    annot_df = create_annotations_df(onset, duration, ch_name,
                                     annotation_label)
    assert len(annot_df['label'].unique()) == 3
    assert annot_df['label'][0] == 'ripple'

    # check errors when lengths mismatch
    with pytest.raises(ValueError,
                       match='Length of "onset", "description", '
                       '"duration", need to be the same.'):
        _onset = onset + [2]
        create_annotations_df(_onset, duration, ch_name, annotation_label)
    with pytest.raises(ValueError,
                       match='Length of "annotation_label" need '
                       'to be the same as other arguments'):
        create_annotations_df(onset, duration, ch_name, annotation_label[0])

    # check typing mismatch, should be caught by pandas
    onset[0] = 'blah'
    with pytest.raises(ValueError, match='could not convert string to float:'):
        create_annotations_df(onset, duration, ch_name)
Esempio n. 3
0
def test_metrics_df(end_sec, rate):
    """Test metrics based on reference dataframe and detector ran dataframe."""
    onset = [1.5, 2.0, 3, 5.5]
    duration = [0.1, 1.0, 1.0, 0.1]
    ch_name = ['A1', 'A2', 'A3', 'A1']
    sfreq = 1000

    annot_df = create_annotations_df(onset, duration, ch_name)

    # error will occur without the sample column
    with pytest.raises(RuntimeError,
                       match='Annotations dataframe '
                       'columns must contain'):
        compute_chs_hfo_rates(annot_df=annot_df, rate=rate)

    # now add sample column
    annot_df['sample'] = annot_df['onset'] * sfreq
    chs_hfo_rates = compute_chs_hfo_rates(annot_df=annot_df,
                                          rate=rate,
                                          end_sec=end_sec)
    assert chs_hfo_rates['A2'] == chs_hfo_rates['A3']
    np.testing.assert_almost_equal(chs_hfo_rates['A2'],
                                   (1. / end_sec) / TIME_SCALE_TO_SECS[rate],
                                   decimal=6)
    np.testing.assert_almost_equal(chs_hfo_rates['A1'],
                                   (2. / end_sec) / TIME_SCALE_TO_SECS[rate],
                                   decimal=6)

    # error if specifying channel names are not inside dataframe
    with pytest.raises(ValueError, match=''):
        compute_chs_hfo_rates(annot_df, rate=rate, ch_names=['A0', 'A1'])
Esempio n. 4
0
def test_match_detection_scoring_sklearn():
    # First create two event dataframes with expected columns. We will
    # consider df1 to be ground truth and df2 to be the prediction
    sfreq = 1000
    # create dummy reference annotations
    onset1 = [1.5, 12.6, 22.342, 59.9]
    offset1 = [6.7300, 14.870, 31.1, 81.2]
    duration1 = [offset - onset for onset, offset in zip(onset1, offset1)]
    ch_name = ['A1'] * len(onset1)
    annotation_label = ['hfo'] * len(onset1)
    annot_df1 = create_annotations_df(onset1, duration1, ch_name,
                                      annotation_label)
    annot_df1['sample'] = annot_df1['onset'] * sfreq

    # create dummy predicted HFO annotations
    onset2 = [2, 12.3, 60.1, 98.3]
    offset2 = [6.93, 15.12, 65.6, 101.45]
    duration2 = [offset - onset for onset, offset in zip(onset2, offset2)]
    ch_name = ['A1'] * len(onset2)
    annotation_label = ['hfo'] * len(onset2)
    annot_df2 = create_annotations_df(onset2, duration2, ch_name,
                                      annotation_label)
    annot_df2['sample'] = annot_df2['onset'] * sfreq

    # Now convert the annotation dataframes to "sklearn format" of a list
    # of lists. Will use y_true for annot_df1 and y_pred for annot_df2
    ch_names = ['A1']
    y_true = _make_ydf_sklearn(annot_df1, ch_names)
    y_pred = _make_ydf_sklearn(annot_df2, ch_names)

    # Perform scoring tests again

    acc = accuracy(y_true, y_pred)
    assert acc == 0.6

    tpr = true_positive_rate(y_true, y_pred)
    assert tpr == 0.75

    fnr = false_negative_rate(y_true, y_pred)
    assert fnr == 0.25

    fdr = false_discovery_rate(y_true, y_pred)
    assert fdr == 0.25

    prec = precision(y_true, y_pred)
    assert prec == 0.75
Esempio n. 5
0
def test_match_detection_scoring_df():
    # First create two event dataframes with expected columns. We will
    # consider df1 to be ground truth and df2 to be the prediction
    sfreq = 1000
    # create dummy reference annotations
    onset1 = [1.5, 12.6, 22.342, 59.9]
    offset1 = [6.7300, 14.870, 31.1, 81.2]
    duration1 = [offset - onset for onset, offset in zip(onset1, offset1)]
    ch_name = ['A1'] * len(onset1)
    annotation_label = ['hfo'] * len(onset1)
    annot_df1 = create_annotations_df(onset1, duration1, ch_name,
                                      annotation_label)
    annot_df1['sample'] = annot_df1['onset'] * sfreq

    # create dummy predicted HFO annotations
    onset2 = [2, 12.3, 60.1, 98.3]
    offset2 = [6.93, 15.12, 65.6, 101.45]
    duration2 = [offset - onset for onset, offset in zip(onset2, offset2)]
    ch_name = ['A1'] * len(onset2)
    annotation_label = ['hfo'] * len(onset2)
    annot_df2 = create_annotations_df(onset2, duration2, ch_name,
                                      annotation_label)
    annot_df2['sample'] = annot_df2['onset'] * sfreq

    # In the above example, we have 3 true positives, 1 false negative,
    # and 1 false positive. Therefore, we expect accuracy = 0.6, tpr = 0.75,
    # fnr = 0.25, fdr = 0.25, and precision = 0.75

    acc = accuracy(annot_df1, annot_df2)
    assert acc == 0.6

    tpr = true_positive_rate(annot_df1, annot_df2)
    assert tpr == 0.75

    fnr = false_negative_rate(annot_df1, annot_df2)
    assert fnr == 0.25

    fdr = false_discovery_rate(annot_df1, annot_df2)
    assert fdr == 0.25

    prec = precision(annot_df1, annot_df2)
    assert prec == 0.75
Esempio n. 6
0
def test_io_annot_df(test_bids_root):
    # create dummy annotations
    onset = [1.5, 2.0, 3]
    duration = [0.0, 0, 1.5]
    ch_name = ['A1', 'A2', 'A3']
    annotation_label = ['ripple', 'frandr', 'fast-ripple']
    annot_df = create_annotations_df(onset, duration, ch_name,
                                     annotation_label)

    annot_path = bids_path.copy().update(root=None,
                                         suffix='annotations',
                                         check=False)
    out_fname = Path(test_bids_root
                     ) / 'derivatives' / 'sub-01' / annot_path.basename  # noqa

    # save to temporary directory
    write_annotations(annot_df,
                      fname=out_fname,
                      intended_for=bids_path,
                      root=test_bids_root)

    # read them back
    annot_df = read_annotations(fname=out_fname, root=test_bids_root)

    # if you fail to pass in root, it should be inferred correctly
    new_annot_df = read_annotations(fname=out_fname)
    pd.testing.assert_frame_equal(annot_df, new_annot_df)

    # if derivatives is not in the subdirectory of bids dataset,
    # then an error will raise if root is not passed in
    tempdir = _TempDir()
    out_fname = Path(
        tempdir) / 'derivatives' / 'sub-01' / annot_path.basename  # noqa
    # save to temporary directory
    write_annotations(annot_df,
                      fname=out_fname,
                      intended_for=bids_path,
                      root=test_bids_root)
    with pytest.raises(RuntimeError, match='No raw dataset found'):
        read_annotations(fname=out_fname)
Esempio n. 7
0
def test_hyperparameter_search_cv(scorer, create_testing_eeg_data):
    sfreq = 5000
    ch_names = ['0']

    parameters = {'threshold': [1, 2, 3], 'win_size': [50, 100, 250]}
    detector = LineLengthDetector()
    scorer = make_scorer(scorer)
    cv = DisabledCV()
    gs = GridSearchCV(detector,
                      param_grid=parameters,
                      scoring=scorer,
                      cv=cv,
                      refit=False,
                      verbose=True)

    # create dummy EEG data with "true" HFO samples
    data, hfo_samps = create_testing_eeg_data
    data_2d = data[np.newaxis, :]
    data_2d = np.vstack((data_2d, data_2d))
    onset_samp = np.array([samp[0] for samp in hfo_samps])
    offset_samp = np.array([samp[1] for samp in hfo_samps])
    onset = onset_samp / sfreq
    offset = offset_samp / sfreq
    duration = offset - onset
    ch_names = ['A0'] * len(onset)

    # create actual Raw input data
    info = mne.create_info(ch_names=['A0', 'A1'], sfreq=sfreq, ch_types='ecog')
    raw = mne.io.RawArray(data_2d, info=info)

    # create the annotations dataframe
    annot_df = create_annotations_df(onset, duration, ch_names)
    annot_df['sample'] = annot_df['onset'] * sfreq

    # make sklearn compatible
    raw_df, y = make_Xy_sklearn(raw, annot_df)
    # run Gridsearch
    gs.fit(raw_df, y, groups=None)
Esempio n. 8
0
def test_compare_detectors():
    """Test comparison metrics."""

    # Create two dummy RMSDetector objects.
    rms1 = RMSDetector()
    rms2 = RMSDetector()

    # Make sure you can't run compare when Detectors haven't been fit
    with pytest.raises(RuntimeError,
                       match='clf_1 must be fit'
                       ' to data before using compare'):
        compare_detectors(rms1, rms2, method="mutual-info")

    # Create two event dataframes with expected columns. We will
    # consider df1 to be predictions from rms1 and df2 to be predictions
    # from rms2
    sfreq = 1000
    # create dummy reference annotations
    onset1 = [8, 12.6, 59.9, 99.2, 150.4]
    offset1 = [9.7300, 14.870, 66.1, 101.22, 156.1]
    duration1 = [offset - onset for onset, offset in zip(onset1, offset1)]
    ch_name = ['A1'] * len(onset1)
    annotation_label = ['hfo'] * len(onset1)
    annot_df1 = create_annotations_df(onset1, duration1, ch_name,
                                      annotation_label)
    annot_df1['sample'] = annot_df1['onset'] * sfreq

    # create dummy predicted HFO annotations
    onset2 = [2, 60.1, 98.3, 110.23]
    offset2 = [6.93, 65.6, 101.45, 112.89]
    duration2 = [offset - onset for onset, offset in zip(onset2, offset2)]
    ch_name = ['A1'] * len(onset2)
    annotation_label = ['hfo'] * len(onset2)
    annot_df2 = create_annotations_df(onset2, duration2, ch_name,
                                      annotation_label)
    annot_df2['sample'] = annot_df2['onset'] * sfreq

    # Attach the annotation dataframes to the dummy detectors
    rms1.df_ = annot_df1

    # Make sure you can't run compare when Detectors haven't been fit
    with pytest.raises(RuntimeError,
                       match='clf_2 must be fit'
                       ' to data before using compare'):
        compare_detectors(rms1, rms2, method="mutual-info")

    rms2.df_ = annot_df2

    # We expect the labels from rms1 to be [False, True, True, True,
    # True, False, True]
    # and the labels from rms2 to be [True, False, False, True, True,
    # True, False]
    # which gives the following mutual info and kappa scores

    expected_mutual_info = 0.20218548540814557
    expected_kappa_score = -0.5217391304347827

    # Calculate mutual info and assert almost equal
    mutual_info = compare_detectors(rms1, rms2, method="mutual-info")
    mi = mutual_info['A1']
    assert_almost_equal(mi, expected_mutual_info, decimal=5)

    # Calculate kappa score and assert almost equal
    kappa = compare_detectors(rms1, rms2, method="cohen-kappa")
    k = kappa['A1']
    assert_almost_equal(k, expected_kappa_score, decimal=5)

    # Make sure you can't run a random method
    with pytest.raises(NotImplementedError):
        compare_detectors(rms1, rms2, method="average")
Esempio n. 9
0
def test_match_hfo_annotations():
    """Test matching HFO detections encoded in annotations DataFrame."""
    sfreq = 1000
    # create dummy reference annotations
    onset1 = [1.5, 12.6, 22.342, 59.9]
    offset1 = [6.7300, 14.870, 31.1, 81.2]
    duration1 = [offset - onset for onset, offset in zip(onset1, offset1)]
    ch_name = ['A1'] * len(onset1)
    annotation_label = ['hfo'] * len(onset1)
    annot_df1 = create_annotations_df(onset1, duration1, ch_name,
                                      annotation_label)
    annot_df1['sample'] = annot_df1['onset'] * sfreq

    # create dummy predicted HFO annotations
    onset2 = [2, 12.3, 60.1, 98.3]
    offset2 = [6.93, 15.12, 65.6, 101.45]
    duration2 = [offset - onset for onset, offset in zip(onset2, offset2)]
    ch_name = ['A1'] * len(onset2)
    annotation_label = ['hfo'] * len(onset2)
    annot_df2 = create_annotations_df(onset2, duration2, ch_name,
                                      annotation_label)
    annot_df2['sample'] = annot_df2['onset'] * sfreq

    # We first want to see what true labels are correctly predicted
    expected_dict_true = {
        "true_index": [0, 1, 2, 3],
        "pred_index": [0, 1, None, 2]
    }
    expected_df_true = pd.DataFrame(expected_dict_true)
    expected_df_true = expected_df_true.apply(pd.to_numeric,
                                              errors="coerce",
                                              downcast="float")
    output_df_true = match_detected_annotations(annot_df1,
                                                annot_df2,
                                                method="match-true")
    pd.testing.assert_frame_equal(expected_df_true,
                                  output_df_true,
                                  check_dtype=False)

    # Now lets check what predicted labels correspond to true labels
    expected_dict_pred = {
        "pred_index": [0, 1, 2, 3],
        "true_index": [0, 1, 3, None]
    }
    expected_df_pred = pd.DataFrame(expected_dict_pred)
    expected_df_pred = expected_df_pred.apply(pd.to_numeric,
                                              errors="coerce",
                                              downcast="float")
    output_df_pred = match_detected_annotations(annot_df1,
                                                annot_df2,
                                                method="match-pred")
    pd.testing.assert_frame_equal(expected_df_pred,
                                  output_df_pred,
                                  check_dtype=False)

    # Now we can check the total output that will make it easier
    # to compute other stats
    expected_dict_total = {
        "true_index": [0, 1, 2, 3, None],
        "pred_index": [0, 1, None, 2, 3]
    }
    expected_df_total = pd.DataFrame(expected_dict_total)
    expected_df_total = expected_df_total.apply(pd.to_numeric,
                                                errors="coerce",
                                                downcast="float")
    output_df_total = match_detected_annotations(annot_df1,
                                                 annot_df2,
                                                 method="match-total")
    pd.testing.assert_frame_equal(expected_df_total,
                                  output_df_total,
                                  check_dtype=False)

    # Error should be thrown for any other passed methods
    with pytest.raises(NotImplementedError, match=''):
        match_detected_annotations(annot_df1,
                                   annot_df2,
                                   method="match-average")
Esempio n. 10
0
def test_match_detections_empty():
    # First create two annotation dataframes with expected columns. We will
    # consider df1 to be ground truth and df2 to be the prediction
    sfreq = 1000
    # create dummy reference annotations
    onset1 = [1.5, 12.6, 22.342, 59.9]
    offset1 = [6.7300, 14.870, 31.1, 81.2]
    duration1 = [offset - onset for onset, offset in zip(onset1, offset1)]
    ch_name = ['A1'] * len(onset1)
    annotation_label = ['hfo'] * len(onset1)
    annot_df1 = create_annotations_df(onset1, duration1, ch_name,
                                      annotation_label)
    annot_df1['sample'] = annot_df1['onset'] * sfreq

    # create dummy reference annotations
    onset2 = []
    offset2 = []
    duration2 = [offset - onset for onset, offset in zip(onset2, offset2)]
    ch_name = ['A1'] * len(onset2)
    annotation_label = ['hfo'] * len(onset2)
    annot_df2 = create_annotations_df(onset2, duration2, ch_name,
                                      annotation_label)
    annot_df2['sample'] = annot_df2['onset'] * sfreq

    expected_dict_true = {
        "true_index": [0, 1, 2, 3],
        "pred_index": [None, None, None, None]
    }
    expected_df_true = pd.DataFrame(expected_dict_true)
    expected_df_true = expected_df_true.apply(pd.to_numeric,
                                              errors="coerce",
                                              downcast="float")
    output_df_true = match_detected_annotations(annot_df1,
                                                annot_df2,
                                                method="match-true")
    pd.testing.assert_frame_equal(expected_df_true,
                                  output_df_true,
                                  check_dtype=False)

    # Now lets check what predicted labels correspond to true labels.
    # Should be empty
    output_df_pred = match_detected_annotations(annot_df1,
                                                annot_df2,
                                                method="match-pred")
    assert output_df_pred.empty

    # Now we can check the total output that will make it easier
    # to compute other stats
    expected_dict_total = {
        "true_index": [0, 1, 2, 3],
        "pred_index": [None, None, None, None]
    }
    expected_df_total = pd.DataFrame(expected_dict_total)
    expected_df_total = expected_df_total.apply(pd.to_numeric,
                                                errors="coerce",
                                                downcast="float")
    output_df_total = match_detected_annotations(annot_df1,
                                                 annot_df2,
                                                 method="match-total")
    pd.testing.assert_frame_equal(expected_df_total,
                                  output_df_total,
                                  check_dtype=False)