def create_alarm_tmap(tm_name: str, alarm_name: str) -> Optional[TensorMap]: tm = None if tm_name == f"{alarm_name}_init_date": tm = TensorMap( name=tm_name, shape=(None, None), # type: ignore interpretation=Interpretation.EVENT, tensor_from_file=make_alarm_array_tensor_from_file(), path_prefix=f"bedmaster/*/alarms/{alarm_name}/start_date", ) elif tm_name == f"{alarm_name}_duration": tm = TensorMap( name=tm_name, shape=(None, None), # type: ignore interpretation=Interpretation.CONTINUOUS, tensor_from_file=make_alarm_array_tensor_from_file(), path_prefix=f"bedmaster/*/alarms/{alarm_name}/duration", ) elif tm_name == f"{alarm_name}_level": tm = TensorMap( name=tm_name, shape=(None,), # type: ignore interpretation=Interpretation.CONTINUOUS, tensor_from_file=make_alarm_attribute_tensor_from_file("level"), path_prefix=f"bedmaster/*/alarms/{alarm_name}", ) return tm
def missing_imputation( tm: TensorMap, hd5: h5py.File, visit: str, indices: List[int], period: str, tensor: np.ndarray, imputation_type: str = None, **kwargs, ): if imputation_type == "sample_and_hold": if len(tensor) == 0 or np.isnan(tensor).all(): if period == "pre": values = tm.tensor_from_file(tm, hd5, visits=visit, **kwargs)[0][:indices[-1]] indice = -1 else: values = tm.tensor_from_file(tm, hd5, visits=visit, **kwargs)[0][indices[0]:] indice = 0 imputation = values[~np.isnan(values)] if imputation.size == 0: imputation = np.array([np.nan]) tensor = np.array([imputation[indice]]) elif imputation_type: name = tm.name.replace(f"_{imputation_type}", "") imputation = ICU_TMAPS_METADATA[name][imputation_type] tensor = np.nan_to_num(tensor, nan=imputation) if len(tensor) == 0: tensor = np.array([imputation]) return tensor
def create_event_department_tmap(tm_name: str, signal_name: str, data_type: str): tm = None name = signal_name.replace("|_", "") if tm_name == f"{name}_departments": tm = TensorMap( name=tm_name, interpretation=Interpretation.CONTINUOUS, tensor_from_file=make_event_department_tensor_from_file(), path_prefix=f"edw/*/{data_type}/{signal_name}/start_date", channel_map={ "mgh blake 8 card sicu": 0, "mgh ellison 8 cardsurg": 1, "mgh ellison 9 med\\ccu": 2, "mgh ellison 10 stp dwn": 3, "mgh ellison11 card\\int": 4, "other": 5, }, ) elif tm_name == f"{name}_departments_with_bm": tm = TensorMap( name=tm_name, interpretation=Interpretation.CONTINUOUS, tensor_from_file=make_event_department_tensor_from_file(True), path_prefix=f"edw/*/{data_type}/{signal_name}/start_date", channel_map={ "mgh blake 8 card sicu": 0, "mgh ellison 8 cardsurg": 1, "mgh ellison 9 med\\ccu": 2, "mgh ellison 10 stp dwn": 3, "mgh ellison11 card\\int": 4, "other": 5, }, ) return tm
def create_ecg_feature_tmap(tm_name: str): tm = None match = None if not match: pattern = re.compile(r"(.*)_(i|ii|iii|v)$") match = pattern.findall(tm_name) if match: peak_name, lead = match[0] tm = TensorMap( name=tm_name, shape=(None, None), interpretation=Interpretation.EVENT, tensor_from_file=make_ecg_peak_tensor_from_file(lead), path_prefix=f"bedmaster/*/ecg_features/{lead}/ecg_{peak_name}", ) if not match: pattern = re.compile(r"(.*)_(i|ii|iii|v)_(timeseries|value|time)$") match = pattern.findall(tm_name) if match: feature_name, lead, tm_type = match[0] if (feature_name.startswith("r") or feature_name.startswith("q") or feature_name.startswith("pr") or feature_name.startswith("s")): ref_peak = "r_peak" elif feature_name.startswith("p"): ref_peak = "p_peak" elif feature_name.startswith("t"): ref_peak = "t_peak" if tm_type == "timeseries": tm = TensorMap( name=tm_name, shape=(None, None), interpretation=Interpretation.TIMESERIES, tensor_from_file=make_ecg_feature_tensor_from_file( f"{lead}_{ref_peak}", ), path_prefix= f"bedmaster/*/ecg_features/{lead}/{feature_name}", ) elif tm_type == "value": tm = TensorMap( name=tm_name, shape=(None, None), interpretation=Interpretation.CONTINUOUS, tensor_from_file=make_ecg_feature_tensor_from_file(), path_prefix= f"bedmaster/*/ecg_features/{lead}/{feature_name}", ) elif tm_type == "time": tm = TensorMap( name=tm_name, shape=(None, None), interpretation=Interpretation.EVENT, tensor_from_file=make_ecg_peak_tensor_from_file(lead), path_prefix= f"bedmaster/*/ecg_features/{lead}/ecg_{ref_peak}", ) return tm
def create_around_explore_tmap(tmap_name: str) -> Optional[TensorMap]: match = None if not match: pattern = re.compile( r"^(.*)_(\d+)_hrs_(pre|post)_(.*)_(\d+)_hrs_window_explore$", ) match = pattern.findall(tmap_name) if match: make_tensor_from_file = make_around_event_explore_tensor_from_file( tmap_name.replace("_explore", ""), ) channel_map = { "min": 0, "max": 1, "mean": 2, "std": 3, "first": 4, "last": 5, "count": 6, } path_prefix = create_around_tmap(tmap_name.replace( "_explore", ""), ).path_prefix return TensorMap( name=tmap_name, tensor_from_file=make_tensor_from_file, channel_map=channel_map, path_prefix=path_prefix, interpretation=Interpretation.CONTINUOUS, ) return None
def admin_age_tensor_from_file( tm: TensorMap, data: PatientData, **kwargs ) -> np.ndarray: if "visits" in kwargs: visits = kwargs["visits"] if isinstance(visits, str): visits = [visits] else: visits = tm.time_series_filter(data) shape = (len(visits),) + tm.shape tensor = np.zeros(shape) for i, visit in enumerate(visits): try: path = f"{tm.path_prefix}/{visit}" admit_date = get_unix_timestamps(data[path].attrs["admin_date"]) birth_date = get_unix_timestamps(data[path].attrs["birth_date"]) age = admit_date - birth_date tensor[i] = age / 60 / 60 / 24 / 365 except (ValueError, KeyError) as e: logging.debug(f"Could not get age from {data.id}/{visit}") logging.debug(e) return tensor
def tensor_from_file_aortic_stenosis_category( tm: TensorMap, data: PatientData, ) -> np.ndarray: """Categorizes aortic stenosis as none, mild, moderate, or severe from aortic valve mean gradient""" echo_dates = tm.time_series_filter(data) av_mean_gradient_key = continuous_tmap_names_and_keys["av_mean_gradient"] av_mean_gradients = data[ECHO_PREFIX].loc[echo_dates.index, av_mean_gradient_key] # Initialize tensor array of zeros where each row is the channel map num_categories = len(tm.channel_map) tensor = np.zeros((len(av_mean_gradients), num_categories)) # Iterate through the peak velocities and mean gradients from all echos for idx, av_mean_gradient in enumerate(av_mean_gradients): if av_mean_gradient < 20: category = "mild" elif 20 <= av_mean_gradient < 40: category = "moderate" elif av_mean_gradient >= 40: category = "severe" else: continue tensor[idx, tm.channel_map[category]] = 1 return tensor
def tensor_from_file(tm: TensorMap, data: PatientData) -> np.ndarray: ici_dates = tm.time_series_filter(data) values = data[ICI_PREFIX].loc[ici_dates.index, key].to_numpy() tensor = np.zeros((len(values), len(tm.channel_map))) for i, value in enumerate(values): tensor[i, tm.channel_map[value]] = 1 return tensor
def create_event_tmap(tm_name: str, event_name: str, event_type: str): tm = None name = event_name.replace("|_", "") if tm_name == f"{name}_start_date": tm = TensorMap( name=tm_name, shape=(None, None), # type: ignore interpretation=Interpretation.EVENT, tensor_from_file=make_event_tensor_from_file(), path_prefix=f"edw/*/{event_type}/{event_name}/start_date", ) elif tm_name == f"{name}_end_date": tm = TensorMap( name=tm_name, shape=(None, None), # type: ignore interpretation=Interpretation.EVENT, tensor_from_file=make_event_tensor_from_file(), path_prefix=f"edw/*/{event_type}/{event_name}/end_date", ) elif tm_name == f"{name}_double": tm = TensorMap( name=tm_name, shape=(2,), interpretation=Interpretation.CATEGORICAL, tensor_from_file=make_event_outcome_tensor_from_file( visit_tm=get_visit_tmap(f"{name}_first_visit"), double=True, ), channel_map={f"no_{name}": 0, name: 1}, path_prefix=f"edw/*/{event_type}/{event_name}/start_date", time_series_limit=2, ) elif tm_name == f"{name}_single": tm = TensorMap( name=tm_name, shape=(1,), interpretation=Interpretation.CATEGORICAL, tensor_from_file=make_event_outcome_tensor_from_file( visit_tm=get_visit_tmap(f"{name}_first_visit"), double=False, ), channel_map={f"no_{name}": 0, name: 1}, path_prefix=f"edw/*/{event_type}/{event_name}/start_date", time_series_limit=2, ) return tm
def create_med_tmap(tm_name: str, med_name: str): tm = None if tm_name == f"{med_name}_timeseries": tm = TensorMap( name=tm_name, shape=(None, 2, None), # type: ignore interpretation=Interpretation.TIMESERIES, tensor_from_file=make_med_array_tensor_from_file(), path_prefix=f"edw/*/med/{med_name}", ) elif tm_name == f"{med_name}_dose": tm = TensorMap( name=tm_name, shape=(None, None), # type: ignore interpretation=Interpretation.CONTINUOUS, tensor_from_file=make_med_array_tensor_from_file(), path_prefix=f"edw/*/med/{med_name}/dose", ) elif tm_name == f"{med_name}_time": tm = TensorMap( name=tm_name, shape=(None, None), # type: ignore interpretation=Interpretation.EVENT, tensor_from_file=make_med_array_tensor_from_file(), path_prefix=f"edw/*/med/{med_name}/start_date", ) elif tm_name == f"{med_name}_units": tm = TensorMap( name=tm_name, shape=(None,), # type: ignore interpretation=Interpretation.LANGUAGE, tensor_from_file=make_med_attribute_tensor_from_file("units"), path_prefix=f"edw/*/med/{med_name}", ) elif tm_name == f"{med_name}_route": tm = TensorMap( name=tm_name, shape=(None,), # type: ignore interpretation=Interpretation.LANGUAGE, tensor_from_file=make_med_attribute_tensor_from_file("route"), path_prefix=f"edw/*/med/{med_name}", ) return tm
def tensor_from_file(tm: TensorMap, data: PatientData) -> np.ndarray: surgery_dates = tm.time_series_filter(data) tensor = data[STS_PREFIX].loc[surgery_dates.index, key].to_numpy() if tm.channel_map is None: raise ValueError(f"{tm.name} channel map is None") tensor = np.array([one_hot(tm.channel_map, x) for x in tensor]) if not is_dynamic_shape(tm): tensor = tensor[0] return tensor
def tensor_from_file(tm: TensorMap, data: PatientData) -> np.ndarray: surgery_dates = tm.time_series_filter(data) tensor = data[STS_PREFIX].loc[surgery_dates.index, key].to_numpy() tensor = np.array( [binarize(key, x, negative_value, positive_value) for x in tensor], ) if not is_dynamic_shape(tm): tensor = tensor[0] return tensor
def create_list_signals_tmap(sig_name: str, sig_type: str, root: str): tm = TensorMap( name=sig_name, shape=(None, None), interpretation=Interpretation.LANGUAGE, tensor_from_file=make_list_signal_tensor_from_file(sig_type), path_prefix=f"{root}/*/{sig_type}", ) return tm
def tff_any(tm: TensorMap, data: PatientData) -> np.ndarray: surgery_dates = tm.time_series_filter(data) tensor = data[STS_PREFIX].loc[surgery_dates.index, sts_outcome_keys].to_numpy() tensor = tensor.any(axis=1).astype(int) tensor = np.array([binarize("any", x) for x in tensor]) if not is_dynamic_shape(tm): tensor = tensor[0] return tensor
def sex_double_tensor_from_file(tm: TensorMap, data: PatientData) -> np.ndarray: visit = tm.time_series_filter(data)[0] shape = (2,) + tm.shape tensor = np.zeros(shape) path = f"{tm.path_prefix}/{visit}" value = data[path].attrs["sex"] tensor[:, tm.channel_map[value.lower()]] = np.array([1, 1]) return tensor
def create_timeseries_tmap(key: str) -> TensorMap: tmap = TensorMap( name=key, shape=(None,), interpretation=Interpretation.TIMESERIES, tensor_from_file=make_static_tensor_from_file(key), path_prefix=EDW_PREFIX, time_series_limit=0, ) return tmap
def create_language_tmap(key: str) -> TensorMap: tmap = TensorMap( name=key, shape=(1,), interpretation=Interpretation.LANGUAGE, tensor_from_file=make_static_tensor_from_file(key), path_prefix=EDW_PREFIX, time_series_limit=0, ) return tmap
def get_sliding_windows( hd5, window: int, step: int, event_tm_1: TensorMap, event_tm_2: TensorMap, visit_tm: TensorMap, buffer_adm_time: int = 24, **kwargs, ): """ Create a sliding window from the time associated to <event_tm_1> to <event_tm_2> with step size <step> and window length <window>. """ if not hasattr(get_sliding_windows, "windows_cache"): get_sliding_windows.windows_cache = {} if hd5.id in get_sliding_windows.windows_cache: return get_sliding_windows.windows_cache[hd5.id] visit = visit_tm.tensor_from_file(visit_tm, hd5, **kwargs)[0] event_time_1 = event_tm_1.tensor_from_file(event_tm_1, hd5, visits=visit, unix_dates=True, **kwargs) event_time_1 = event_time_1[0][0] event_time_2 = event_tm_2.tensor_from_file(event_tm_2, hd5, visits=visit, **kwargs) event_time_2 = event_time_2[0][0] windows = np.arange( event_time_1 + (buffer_adm_time + window) * 60 * 60, event_time_2, step * 60 * 60, ) get_sliding_windows.windows_cache[hd5.id] = windows if windows.size == 0: raise ValueError( "It is not possible to compute a sliding window with the given parameters.", ) return windows
def create_arrest_tmap(tm_name: str): arrest_list = ["code_start", "rapid_response_start"] tm = None if tm_name == "arrest_start_date": tm = TensorMap( name=tm_name, shape=(None, None), # type: ignore interpretation=Interpretation.EVENT, tensor_from_file=make_general_event_tensor_from_subevents(arrest_list), path_prefix="edw/*/events/{}/start_date", ) if tm_name == "arrest_double": tm = TensorMap( name=tm_name, shape=(2,), interpretation=Interpretation.CATEGORICAL, tensor_from_file=make_general_event_outcome_tensor_from_subevents( visit_tm=get_visit_tmap("arrest_first_visit"), events_names_list=arrest_list, double=True, ), channel_map={"no_arrest": 0, "arrest": 1}, path_prefix="edw/*/events/{}/start_date", time_series_limit=2, ) if tm_name == "arrest_single": tm = TensorMap( name=tm_name, shape=(1,), interpretation=Interpretation.CATEGORICAL, tensor_from_file=make_general_event_outcome_tensor_from_subevents( visit_tm=get_visit_tmap("arrest_first_visit"), events_names_list=arrest_list, double=False, ), channel_map={"no_arrest": 0, "arrest": 1}, path_prefix="edw/*/events/{}/start_date", time_series_limit=2, ) return tm
def create_age_tmap(): tmap = TensorMap( name="age", shape=(1,), interpretation=Interpretation.CONTINUOUS, tensor_from_file=admin_age_tensor_from_file, path_prefix=EDW_PREFIX, time_series_limit=0, validators=validator_no_negative, ) return tmap
def tensor_from_file(tm: TensorMap, data: PatientData) -> np.ndarray: echo_dates = tm.time_series_filter(data) mean_gradient_key = continuous_tmap_names_and_keys["av_mean_gradient"] mean_gradients = data[ECHO_PREFIX].loc[echo_dates.index, mean_gradient_key] tensor = np.zeros((len(echo_dates), 2)) for idx, mean_gradient in enumerate(mean_gradients): if np.isnan(mean_gradient): continue tensor[idx, 1 if low <= mean_gradient < high else 0] = 1 return tensor
def create_categorical_tmap(key: str, channel_map: Dict[str, int]) -> TensorMap: tmap = TensorMap( name=key, interpretation=Interpretation.CATEGORICAL, tensor_from_file=make_static_tensor_from_file(key), channel_map=channel_map, path_prefix=EDW_PREFIX, time_series_limit=0, validators=validator_not_all_zero, ) return tmap
def create_continuous_tmap(key: str) -> TensorMap: tmap = TensorMap( name=key, shape=(1,), interpretation=Interpretation.CONTINUOUS, tensor_from_file=make_static_tensor_from_file(key), path_prefix=EDW_PREFIX, time_series_limit=0, validators=validator_no_negative, ) return tmap
def create_static_event_tmap(key: str) -> TensorMap: tmap = TensorMap( name=key, shape=(1,), interpretation=Interpretation.EVENT, tensor_from_file=make_static_tensor_from_file(key), path_prefix=EDW_PREFIX, time_series_limit=0, validators=validator_no_empty, ) return tmap
def create_sex_double_tmap(): tmap = TensorMap( name="sex_double", interpretation=Interpretation.CATEGORICAL, tensor_from_file=sex_double_tensor_from_file, channel_map={"male": 0, "female": 1}, path_prefix=EDW_PREFIX, time_series_limit=2, validators=validator_not_all_zero, ) return tmap
def create_mrn_tmap(): tmap = TensorMap( name="mrn", shape=(1,), interpretation=Interpretation.LANGUAGE, tensor_from_file=mrn_tensor_from_file, path_prefix=EDW_PREFIX, time_series_limit=0, validators=validator_no_empty, ) return tmap
def create_first_visit_tmap(tm_name: str, signal_name: str, data_type: str): tm = None name = signal_name.replace("|_", "") if tm_name == f"{name}_first_visit": tm = TensorMap( name=tm_name, shape=(1,), interpretation=Interpretation.CONTINUOUS, tensor_from_file=make_first_visit_tensor_from_file(), path_prefix=f"edw/*/{data_type}/{signal_name}/start_date", ) return tm
def create_arrest_first_visit_tmap(tm_name: str): events_names_list = ["code_start", "rapid_response_start"] tm = None if tm_name == "arrest_first_visit": tm = TensorMap( name=tm_name, shape=(1,), interpretation=Interpretation.CONTINUOUS, tensor_from_file=make_general_first_visit_tensor_from_subset( events_names_list, ), path_prefix="edw/*/events/{}/start_date", ) return tm
def _tensor_from_file(tm: TensorMap, data: PatientData, **kwargs) -> np.ndarray: unix_dates = kwargs.get("unix_dates") if "visits" in kwargs: visits = kwargs["visits"] if isinstance(visits, str): visits = [visits] else: visits = tm.time_series_filter(data) temp = None finalize = False if tm.is_timeseries: temp = [data[f"{tm.path_prefix}/{v}"].attrs[key] for v in visits] max_len = max(map(len, temp)) shape = (len(visits), max_len) else: shape = (len(visits),) + tm.shape if tm.is_categorical or tm.is_continuous or (tm.is_event and unix_dates): tensor = np.zeros(shape) elif tm.is_language or (tm.is_event and not unix_dates): tensor = np.full(shape, "", object) finalize = True elif tm.is_timeseries and temp is not None: if isinstance(temp[0][0], np.number): tensor = np.zeros(shape) else: tensor = np.full(shape, "", object) finalize = True else: raise ValueError("Unknown interpretation for static ICU data") for i, visit in enumerate(visits): try: path = f"{tm.path_prefix}/{visit}" value = data[path].attrs[key] if temp is None else temp[i] if tm.channel_map: tensor[i, tm.channel_map[value.lower()]] = 1 elif tm.is_event and unix_dates: tensor[i] = get_unix_timestamps(value) else: tensor[i] = value except (ValueError, KeyError) as e: logging.debug(f"Error getting {key} from {data.id}/{visit}") logging.debug(e) if finalize: tensor = np.array(tensor, dtype=str) return tensor
def test_explore( default_arguments_explore: argparse.Namespace, tmpdir_factory, utils, ): temp_dir = tmpdir_factory.mktemp("explore_tensors") default_arguments_explore.tensors = str(temp_dir) tmaps = pytest.TMAPS_UP_TO_4D[:] tmaps.append( TensorMap( "scalar", shape=(1, ), interpretation=Interpretation.CONTINUOUS, tensor_from_file=pytest.TFF, ), ) explore_expected = utils.build_hd5s(temp_dir, tmaps, n=pytest.N_TENSORS) default_arguments_explore.num_workers = 3 default_arguments_explore.tensor_maps_in = tmaps default_arguments_explore.explore_export_fpath = True explore(default_arguments_explore) csv_path = os.path.join( default_arguments_explore.output_folder, "tensors_union.csv", ) explore_result = pd.read_csv(csv_path) for row in explore_result.iterrows(): row = row[1] for tm in tmaps: row_expected = explore_expected[(row["fpath"], tm)] if _tmap_requires_modification_for_explore(tm): actual = getattr(row, continuous_explore_header(tm) + "_mean") assert not np.isnan(actual) continue if tm.is_continuous: actual = getattr(row, continuous_explore_header(tm)) assert actual == row_expected continue if tm.is_categorical: for channel, idx in tm.channel_map.items(): channel_val = getattr( row, categorical_explore_header(tm, channel), ) assert channel_val == row_expected[idx]