def test_merge_overlapping_hfos(): onset = [1.5, 2.0] duration = [0.1, 1.0] ch_name = ['A1', 'A1'] sfreq = 1000 # create the annotations dataframe annot_df = create_annotations_df(onset, duration, ch_name) annot_df['sample'] = annot_df['onset'] * sfreq # first, test that no merging occurs when it shouldn't # merge overlapping HFOs should result in the exact same # annotations dataframe new_annot_df = merge_overlapping_events(annot_df) pd.testing.assert_frame_equal(annot_df, new_annot_df) # now onset = [1.5, 2.0, 1.55] duration = [0.1, 1.0, 0.5] ch_name = ['A1', 'A1', 'A1'] # create the annotations dataframe annot_df = create_annotations_df(onset, duration, ch_name) annot_df['sample'] = annot_df['onset'] * sfreq # nexxt, test that merging occurs when all three events overlap # merge overlapping HFOs should result in the exact same # annotations dataframe new_annot_df = merge_overlapping_events(annot_df) assert new_annot_df.shape == (1, 5) assert new_annot_df['onset'].values == [1.5] assert new_annot_df['duration'].values == [0.55] assert new_annot_df['channels'].values == ['A1']
def test_create_annot_df(): onset = [1.5, 2.0, 3] duration = [0.0, 0, 1.5] ch_name = ['A1', 'A2', 'A3'] annotation_label = ['ripple', 'frandr', 'fast-ripple'] # without using annotation label, everything is labeled HFO annot_df = create_annotations_df(onset, duration, ch_name) assert len(annot_df['label'].unique()) == 1 assert annot_df['label'][0] == 'HFO' # using annotation label, everything is labeled annot_df = create_annotations_df(onset, duration, ch_name, annotation_label) assert len(annot_df['label'].unique()) == 3 assert annot_df['label'][0] == 'ripple' # check errors when lengths mismatch with pytest.raises(ValueError, match='Length of "onset", "description", ' '"duration", need to be the same.'): _onset = onset + [2] create_annotations_df(_onset, duration, ch_name, annotation_label) with pytest.raises(ValueError, match='Length of "annotation_label" need ' 'to be the same as other arguments'): create_annotations_df(onset, duration, ch_name, annotation_label[0]) # check typing mismatch, should be caught by pandas onset[0] = 'blah' with pytest.raises(ValueError, match='could not convert string to float:'): create_annotations_df(onset, duration, ch_name)
def test_metrics_df(end_sec, rate): """Test metrics based on reference dataframe and detector ran dataframe.""" onset = [1.5, 2.0, 3, 5.5] duration = [0.1, 1.0, 1.0, 0.1] ch_name = ['A1', 'A2', 'A3', 'A1'] sfreq = 1000 annot_df = create_annotations_df(onset, duration, ch_name) # error will occur without the sample column with pytest.raises(RuntimeError, match='Annotations dataframe ' 'columns must contain'): compute_chs_hfo_rates(annot_df=annot_df, rate=rate) # now add sample column annot_df['sample'] = annot_df['onset'] * sfreq chs_hfo_rates = compute_chs_hfo_rates(annot_df=annot_df, rate=rate, end_sec=end_sec) assert chs_hfo_rates['A2'] == chs_hfo_rates['A3'] np.testing.assert_almost_equal(chs_hfo_rates['A2'], (1. / end_sec) / TIME_SCALE_TO_SECS[rate], decimal=6) np.testing.assert_almost_equal(chs_hfo_rates['A1'], (2. / end_sec) / TIME_SCALE_TO_SECS[rate], decimal=6) # error if specifying channel names are not inside dataframe with pytest.raises(ValueError, match=''): compute_chs_hfo_rates(annot_df, rate=rate, ch_names=['A0', 'A1'])
def test_match_detection_scoring_sklearn(): # First create two event dataframes with expected columns. We will # consider df1 to be ground truth and df2 to be the prediction sfreq = 1000 # create dummy reference annotations onset1 = [1.5, 12.6, 22.342, 59.9] offset1 = [6.7300, 14.870, 31.1, 81.2] duration1 = [offset - onset for onset, offset in zip(onset1, offset1)] ch_name = ['A1'] * len(onset1) annotation_label = ['hfo'] * len(onset1) annot_df1 = create_annotations_df(onset1, duration1, ch_name, annotation_label) annot_df1['sample'] = annot_df1['onset'] * sfreq # create dummy predicted HFO annotations onset2 = [2, 12.3, 60.1, 98.3] offset2 = [6.93, 15.12, 65.6, 101.45] duration2 = [offset - onset for onset, offset in zip(onset2, offset2)] ch_name = ['A1'] * len(onset2) annotation_label = ['hfo'] * len(onset2) annot_df2 = create_annotations_df(onset2, duration2, ch_name, annotation_label) annot_df2['sample'] = annot_df2['onset'] * sfreq # Now convert the annotation dataframes to "sklearn format" of a list # of lists. Will use y_true for annot_df1 and y_pred for annot_df2 ch_names = ['A1'] y_true = _make_ydf_sklearn(annot_df1, ch_names) y_pred = _make_ydf_sklearn(annot_df2, ch_names) # Perform scoring tests again acc = accuracy(y_true, y_pred) assert acc == 0.6 tpr = true_positive_rate(y_true, y_pred) assert tpr == 0.75 fnr = false_negative_rate(y_true, y_pred) assert fnr == 0.25 fdr = false_discovery_rate(y_true, y_pred) assert fdr == 0.25 prec = precision(y_true, y_pred) assert prec == 0.75
def test_match_detection_scoring_df(): # First create two event dataframes with expected columns. We will # consider df1 to be ground truth and df2 to be the prediction sfreq = 1000 # create dummy reference annotations onset1 = [1.5, 12.6, 22.342, 59.9] offset1 = [6.7300, 14.870, 31.1, 81.2] duration1 = [offset - onset for onset, offset in zip(onset1, offset1)] ch_name = ['A1'] * len(onset1) annotation_label = ['hfo'] * len(onset1) annot_df1 = create_annotations_df(onset1, duration1, ch_name, annotation_label) annot_df1['sample'] = annot_df1['onset'] * sfreq # create dummy predicted HFO annotations onset2 = [2, 12.3, 60.1, 98.3] offset2 = [6.93, 15.12, 65.6, 101.45] duration2 = [offset - onset for onset, offset in zip(onset2, offset2)] ch_name = ['A1'] * len(onset2) annotation_label = ['hfo'] * len(onset2) annot_df2 = create_annotations_df(onset2, duration2, ch_name, annotation_label) annot_df2['sample'] = annot_df2['onset'] * sfreq # In the above example, we have 3 true positives, 1 false negative, # and 1 false positive. Therefore, we expect accuracy = 0.6, tpr = 0.75, # fnr = 0.25, fdr = 0.25, and precision = 0.75 acc = accuracy(annot_df1, annot_df2) assert acc == 0.6 tpr = true_positive_rate(annot_df1, annot_df2) assert tpr == 0.75 fnr = false_negative_rate(annot_df1, annot_df2) assert fnr == 0.25 fdr = false_discovery_rate(annot_df1, annot_df2) assert fdr == 0.25 prec = precision(annot_df1, annot_df2) assert prec == 0.75
def test_io_annot_df(test_bids_root): # create dummy annotations onset = [1.5, 2.0, 3] duration = [0.0, 0, 1.5] ch_name = ['A1', 'A2', 'A3'] annotation_label = ['ripple', 'frandr', 'fast-ripple'] annot_df = create_annotations_df(onset, duration, ch_name, annotation_label) annot_path = bids_path.copy().update(root=None, suffix='annotations', check=False) out_fname = Path(test_bids_root ) / 'derivatives' / 'sub-01' / annot_path.basename # noqa # save to temporary directory write_annotations(annot_df, fname=out_fname, intended_for=bids_path, root=test_bids_root) # read them back annot_df = read_annotations(fname=out_fname, root=test_bids_root) # if you fail to pass in root, it should be inferred correctly new_annot_df = read_annotations(fname=out_fname) pd.testing.assert_frame_equal(annot_df, new_annot_df) # if derivatives is not in the subdirectory of bids dataset, # then an error will raise if root is not passed in tempdir = _TempDir() out_fname = Path( tempdir) / 'derivatives' / 'sub-01' / annot_path.basename # noqa # save to temporary directory write_annotations(annot_df, fname=out_fname, intended_for=bids_path, root=test_bids_root) with pytest.raises(RuntimeError, match='No raw dataset found'): read_annotations(fname=out_fname)
def test_hyperparameter_search_cv(scorer, create_testing_eeg_data): sfreq = 5000 ch_names = ['0'] parameters = {'threshold': [1, 2, 3], 'win_size': [50, 100, 250]} detector = LineLengthDetector() scorer = make_scorer(scorer) cv = DisabledCV() gs = GridSearchCV(detector, param_grid=parameters, scoring=scorer, cv=cv, refit=False, verbose=True) # create dummy EEG data with "true" HFO samples data, hfo_samps = create_testing_eeg_data data_2d = data[np.newaxis, :] data_2d = np.vstack((data_2d, data_2d)) onset_samp = np.array([samp[0] for samp in hfo_samps]) offset_samp = np.array([samp[1] for samp in hfo_samps]) onset = onset_samp / sfreq offset = offset_samp / sfreq duration = offset - onset ch_names = ['A0'] * len(onset) # create actual Raw input data info = mne.create_info(ch_names=['A0', 'A1'], sfreq=sfreq, ch_types='ecog') raw = mne.io.RawArray(data_2d, info=info) # create the annotations dataframe annot_df = create_annotations_df(onset, duration, ch_names) annot_df['sample'] = annot_df['onset'] * sfreq # make sklearn compatible raw_df, y = make_Xy_sklearn(raw, annot_df) # run Gridsearch gs.fit(raw_df, y, groups=None)
def test_compare_detectors(): """Test comparison metrics.""" # Create two dummy RMSDetector objects. rms1 = RMSDetector() rms2 = RMSDetector() # Make sure you can't run compare when Detectors haven't been fit with pytest.raises(RuntimeError, match='clf_1 must be fit' ' to data before using compare'): compare_detectors(rms1, rms2, method="mutual-info") # Create two event dataframes with expected columns. We will # consider df1 to be predictions from rms1 and df2 to be predictions # from rms2 sfreq = 1000 # create dummy reference annotations onset1 = [8, 12.6, 59.9, 99.2, 150.4] offset1 = [9.7300, 14.870, 66.1, 101.22, 156.1] duration1 = [offset - onset for onset, offset in zip(onset1, offset1)] ch_name = ['A1'] * len(onset1) annotation_label = ['hfo'] * len(onset1) annot_df1 = create_annotations_df(onset1, duration1, ch_name, annotation_label) annot_df1['sample'] = annot_df1['onset'] * sfreq # create dummy predicted HFO annotations onset2 = [2, 60.1, 98.3, 110.23] offset2 = [6.93, 65.6, 101.45, 112.89] duration2 = [offset - onset for onset, offset in zip(onset2, offset2)] ch_name = ['A1'] * len(onset2) annotation_label = ['hfo'] * len(onset2) annot_df2 = create_annotations_df(onset2, duration2, ch_name, annotation_label) annot_df2['sample'] = annot_df2['onset'] * sfreq # Attach the annotation dataframes to the dummy detectors rms1.df_ = annot_df1 # Make sure you can't run compare when Detectors haven't been fit with pytest.raises(RuntimeError, match='clf_2 must be fit' ' to data before using compare'): compare_detectors(rms1, rms2, method="mutual-info") rms2.df_ = annot_df2 # We expect the labels from rms1 to be [False, True, True, True, # True, False, True] # and the labels from rms2 to be [True, False, False, True, True, # True, False] # which gives the following mutual info and kappa scores expected_mutual_info = 0.20218548540814557 expected_kappa_score = -0.5217391304347827 # Calculate mutual info and assert almost equal mutual_info = compare_detectors(rms1, rms2, method="mutual-info") mi = mutual_info['A1'] assert_almost_equal(mi, expected_mutual_info, decimal=5) # Calculate kappa score and assert almost equal kappa = compare_detectors(rms1, rms2, method="cohen-kappa") k = kappa['A1'] assert_almost_equal(k, expected_kappa_score, decimal=5) # Make sure you can't run a random method with pytest.raises(NotImplementedError): compare_detectors(rms1, rms2, method="average")
def test_match_hfo_annotations(): """Test matching HFO detections encoded in annotations DataFrame.""" sfreq = 1000 # create dummy reference annotations onset1 = [1.5, 12.6, 22.342, 59.9] offset1 = [6.7300, 14.870, 31.1, 81.2] duration1 = [offset - onset for onset, offset in zip(onset1, offset1)] ch_name = ['A1'] * len(onset1) annotation_label = ['hfo'] * len(onset1) annot_df1 = create_annotations_df(onset1, duration1, ch_name, annotation_label) annot_df1['sample'] = annot_df1['onset'] * sfreq # create dummy predicted HFO annotations onset2 = [2, 12.3, 60.1, 98.3] offset2 = [6.93, 15.12, 65.6, 101.45] duration2 = [offset - onset for onset, offset in zip(onset2, offset2)] ch_name = ['A1'] * len(onset2) annotation_label = ['hfo'] * len(onset2) annot_df2 = create_annotations_df(onset2, duration2, ch_name, annotation_label) annot_df2['sample'] = annot_df2['onset'] * sfreq # We first want to see what true labels are correctly predicted expected_dict_true = { "true_index": [0, 1, 2, 3], "pred_index": [0, 1, None, 2] } expected_df_true = pd.DataFrame(expected_dict_true) expected_df_true = expected_df_true.apply(pd.to_numeric, errors="coerce", downcast="float") output_df_true = match_detected_annotations(annot_df1, annot_df2, method="match-true") pd.testing.assert_frame_equal(expected_df_true, output_df_true, check_dtype=False) # Now lets check what predicted labels correspond to true labels expected_dict_pred = { "pred_index": [0, 1, 2, 3], "true_index": [0, 1, 3, None] } expected_df_pred = pd.DataFrame(expected_dict_pred) expected_df_pred = expected_df_pred.apply(pd.to_numeric, errors="coerce", downcast="float") output_df_pred = match_detected_annotations(annot_df1, annot_df2, method="match-pred") pd.testing.assert_frame_equal(expected_df_pred, output_df_pred, check_dtype=False) # Now we can check the total output that will make it easier # to compute other stats expected_dict_total = { "true_index": [0, 1, 2, 3, None], "pred_index": [0, 1, None, 2, 3] } expected_df_total = pd.DataFrame(expected_dict_total) expected_df_total = expected_df_total.apply(pd.to_numeric, errors="coerce", downcast="float") output_df_total = match_detected_annotations(annot_df1, annot_df2, method="match-total") pd.testing.assert_frame_equal(expected_df_total, output_df_total, check_dtype=False) # Error should be thrown for any other passed methods with pytest.raises(NotImplementedError, match=''): match_detected_annotations(annot_df1, annot_df2, method="match-average")
def test_match_detections_empty(): # First create two annotation dataframes with expected columns. We will # consider df1 to be ground truth and df2 to be the prediction sfreq = 1000 # create dummy reference annotations onset1 = [1.5, 12.6, 22.342, 59.9] offset1 = [6.7300, 14.870, 31.1, 81.2] duration1 = [offset - onset for onset, offset in zip(onset1, offset1)] ch_name = ['A1'] * len(onset1) annotation_label = ['hfo'] * len(onset1) annot_df1 = create_annotations_df(onset1, duration1, ch_name, annotation_label) annot_df1['sample'] = annot_df1['onset'] * sfreq # create dummy reference annotations onset2 = [] offset2 = [] duration2 = [offset - onset for onset, offset in zip(onset2, offset2)] ch_name = ['A1'] * len(onset2) annotation_label = ['hfo'] * len(onset2) annot_df2 = create_annotations_df(onset2, duration2, ch_name, annotation_label) annot_df2['sample'] = annot_df2['onset'] * sfreq expected_dict_true = { "true_index": [0, 1, 2, 3], "pred_index": [None, None, None, None] } expected_df_true = pd.DataFrame(expected_dict_true) expected_df_true = expected_df_true.apply(pd.to_numeric, errors="coerce", downcast="float") output_df_true = match_detected_annotations(annot_df1, annot_df2, method="match-true") pd.testing.assert_frame_equal(expected_df_true, output_df_true, check_dtype=False) # Now lets check what predicted labels correspond to true labels. # Should be empty output_df_pred = match_detected_annotations(annot_df1, annot_df2, method="match-pred") assert output_df_pred.empty # Now we can check the total output that will make it easier # to compute other stats expected_dict_total = { "true_index": [0, 1, 2, 3], "pred_index": [None, None, None, None] } expected_df_total = pd.DataFrame(expected_dict_total) expected_df_total = expected_df_total.apply(pd.to_numeric, errors="coerce", downcast="float") output_df_total = match_detected_annotations(annot_df1, annot_df2, method="match-total") pd.testing.assert_frame_equal(expected_df_total, output_df_total, check_dtype=False)