def create_alarm_tmap(tm_name: str, alarm_name: str) -> Optional[TensorMap]: tm = None if tm_name == f"{alarm_name}_init_date": tm = TensorMap( name=tm_name, shape=(None, None), # type: ignore interpretation=Interpretation.EVENT, tensor_from_file=make_alarm_array_tensor_from_file(), path_prefix=f"bedmaster/*/alarms/{alarm_name}/start_date", ) elif tm_name == f"{alarm_name}_duration": tm = TensorMap( name=tm_name, shape=(None, None), # type: ignore interpretation=Interpretation.CONTINUOUS, tensor_from_file=make_alarm_array_tensor_from_file(), path_prefix=f"bedmaster/*/alarms/{alarm_name}/duration", ) elif tm_name == f"{alarm_name}_level": tm = TensorMap( name=tm_name, shape=(None,), # type: ignore interpretation=Interpretation.CONTINUOUS, tensor_from_file=make_alarm_attribute_tensor_from_file("level"), path_prefix=f"bedmaster/*/alarms/{alarm_name}", ) return tm
def create_event_department_tmap(tm_name: str, signal_name: str, data_type: str): tm = None name = signal_name.replace("|_", "") if tm_name == f"{name}_departments": tm = TensorMap( name=tm_name, interpretation=Interpretation.CONTINUOUS, tensor_from_file=make_event_department_tensor_from_file(), path_prefix=f"edw/*/{data_type}/{signal_name}/start_date", channel_map={ "mgh blake 8 card sicu": 0, "mgh ellison 8 cardsurg": 1, "mgh ellison 9 med\\ccu": 2, "mgh ellison 10 stp dwn": 3, "mgh ellison11 card\\int": 4, "other": 5, }, ) elif tm_name == f"{name}_departments_with_bm": tm = TensorMap( name=tm_name, interpretation=Interpretation.CONTINUOUS, tensor_from_file=make_event_department_tensor_from_file(True), path_prefix=f"edw/*/{data_type}/{signal_name}/start_date", channel_map={ "mgh blake 8 card sicu": 0, "mgh ellison 8 cardsurg": 1, "mgh ellison 9 med\\ccu": 2, "mgh ellison 10 stp dwn": 3, "mgh ellison11 card\\int": 4, "other": 5, }, ) return tm
def create_ecg_feature_tmap(tm_name: str): tm = None match = None if not match: pattern = re.compile(r"(.*)_(i|ii|iii|v)$") match = pattern.findall(tm_name) if match: peak_name, lead = match[0] tm = TensorMap( name=tm_name, shape=(None, None), interpretation=Interpretation.EVENT, tensor_from_file=make_ecg_peak_tensor_from_file(lead), path_prefix=f"bedmaster/*/ecg_features/{lead}/ecg_{peak_name}", ) if not match: pattern = re.compile(r"(.*)_(i|ii|iii|v)_(timeseries|value|time)$") match = pattern.findall(tm_name) if match: feature_name, lead, tm_type = match[0] if (feature_name.startswith("r") or feature_name.startswith("q") or feature_name.startswith("pr") or feature_name.startswith("s")): ref_peak = "r_peak" elif feature_name.startswith("p"): ref_peak = "p_peak" elif feature_name.startswith("t"): ref_peak = "t_peak" if tm_type == "timeseries": tm = TensorMap( name=tm_name, shape=(None, None), interpretation=Interpretation.TIMESERIES, tensor_from_file=make_ecg_feature_tensor_from_file( f"{lead}_{ref_peak}", ), path_prefix= f"bedmaster/*/ecg_features/{lead}/{feature_name}", ) elif tm_type == "value": tm = TensorMap( name=tm_name, shape=(None, None), interpretation=Interpretation.CONTINUOUS, tensor_from_file=make_ecg_feature_tensor_from_file(), path_prefix= f"bedmaster/*/ecg_features/{lead}/{feature_name}", ) elif tm_type == "time": tm = TensorMap( name=tm_name, shape=(None, None), interpretation=Interpretation.EVENT, tensor_from_file=make_ecg_peak_tensor_from_file(lead), path_prefix= f"bedmaster/*/ecg_features/{lead}/ecg_{ref_peak}", ) return tm
def create_around_explore_tmap(tmap_name: str) -> Optional[TensorMap]: match = None if not match: pattern = re.compile( r"^(.*)_(\d+)_hrs_(pre|post)_(.*)_(\d+)_hrs_window_explore$", ) match = pattern.findall(tmap_name) if match: make_tensor_from_file = make_around_event_explore_tensor_from_file( tmap_name.replace("_explore", ""), ) channel_map = { "min": 0, "max": 1, "mean": 2, "std": 3, "first": 4, "last": 5, "count": 6, } path_prefix = create_around_tmap(tmap_name.replace( "_explore", ""), ).path_prefix return TensorMap( name=tmap_name, tensor_from_file=make_tensor_from_file, channel_map=channel_map, path_prefix=path_prefix, interpretation=Interpretation.CONTINUOUS, ) return None
def create_event_tmap(tm_name: str, event_name: str, event_type: str): tm = None name = event_name.replace("|_", "") if tm_name == f"{name}_start_date": tm = TensorMap( name=tm_name, shape=(None, None), # type: ignore interpretation=Interpretation.EVENT, tensor_from_file=make_event_tensor_from_file(), path_prefix=f"edw/*/{event_type}/{event_name}/start_date", ) elif tm_name == f"{name}_end_date": tm = TensorMap( name=tm_name, shape=(None, None), # type: ignore interpretation=Interpretation.EVENT, tensor_from_file=make_event_tensor_from_file(), path_prefix=f"edw/*/{event_type}/{event_name}/end_date", ) elif tm_name == f"{name}_double": tm = TensorMap( name=tm_name, shape=(2,), interpretation=Interpretation.CATEGORICAL, tensor_from_file=make_event_outcome_tensor_from_file( visit_tm=get_visit_tmap(f"{name}_first_visit"), double=True, ), channel_map={f"no_{name}": 0, name: 1}, path_prefix=f"edw/*/{event_type}/{event_name}/start_date", time_series_limit=2, ) elif tm_name == f"{name}_single": tm = TensorMap( name=tm_name, shape=(1,), interpretation=Interpretation.CATEGORICAL, tensor_from_file=make_event_outcome_tensor_from_file( visit_tm=get_visit_tmap(f"{name}_first_visit"), double=False, ), channel_map={f"no_{name}": 0, name: 1}, path_prefix=f"edw/*/{event_type}/{event_name}/start_date", time_series_limit=2, ) return tm
def create_med_tmap(tm_name: str, med_name: str): tm = None if tm_name == f"{med_name}_timeseries": tm = TensorMap( name=tm_name, shape=(None, 2, None), # type: ignore interpretation=Interpretation.TIMESERIES, tensor_from_file=make_med_array_tensor_from_file(), path_prefix=f"edw/*/med/{med_name}", ) elif tm_name == f"{med_name}_dose": tm = TensorMap( name=tm_name, shape=(None, None), # type: ignore interpretation=Interpretation.CONTINUOUS, tensor_from_file=make_med_array_tensor_from_file(), path_prefix=f"edw/*/med/{med_name}/dose", ) elif tm_name == f"{med_name}_time": tm = TensorMap( name=tm_name, shape=(None, None), # type: ignore interpretation=Interpretation.EVENT, tensor_from_file=make_med_array_tensor_from_file(), path_prefix=f"edw/*/med/{med_name}/start_date", ) elif tm_name == f"{med_name}_units": tm = TensorMap( name=tm_name, shape=(None,), # type: ignore interpretation=Interpretation.LANGUAGE, tensor_from_file=make_med_attribute_tensor_from_file("units"), path_prefix=f"edw/*/med/{med_name}", ) elif tm_name == f"{med_name}_route": tm = TensorMap( name=tm_name, shape=(None,), # type: ignore interpretation=Interpretation.LANGUAGE, tensor_from_file=make_med_attribute_tensor_from_file("route"), path_prefix=f"edw/*/med/{med_name}", ) return tm
def create_list_signals_tmap(sig_name: str, sig_type: str, root: str): tm = TensorMap( name=sig_name, shape=(None, None), interpretation=Interpretation.LANGUAGE, tensor_from_file=make_list_signal_tensor_from_file(sig_type), path_prefix=f"{root}/*/{sig_type}", ) return tm
def create_timeseries_tmap(key: str) -> TensorMap: tmap = TensorMap( name=key, shape=(None,), interpretation=Interpretation.TIMESERIES, tensor_from_file=make_static_tensor_from_file(key), path_prefix=EDW_PREFIX, time_series_limit=0, ) return tmap
def create_language_tmap(key: str) -> TensorMap: tmap = TensorMap( name=key, shape=(1,), interpretation=Interpretation.LANGUAGE, tensor_from_file=make_static_tensor_from_file(key), path_prefix=EDW_PREFIX, time_series_limit=0, ) return tmap
def create_arrest_tmap(tm_name: str): arrest_list = ["code_start", "rapid_response_start"] tm = None if tm_name == "arrest_start_date": tm = TensorMap( name=tm_name, shape=(None, None), # type: ignore interpretation=Interpretation.EVENT, tensor_from_file=make_general_event_tensor_from_subevents(arrest_list), path_prefix="edw/*/events/{}/start_date", ) if tm_name == "arrest_double": tm = TensorMap( name=tm_name, shape=(2,), interpretation=Interpretation.CATEGORICAL, tensor_from_file=make_general_event_outcome_tensor_from_subevents( visit_tm=get_visit_tmap("arrest_first_visit"), events_names_list=arrest_list, double=True, ), channel_map={"no_arrest": 0, "arrest": 1}, path_prefix="edw/*/events/{}/start_date", time_series_limit=2, ) if tm_name == "arrest_single": tm = TensorMap( name=tm_name, shape=(1,), interpretation=Interpretation.CATEGORICAL, tensor_from_file=make_general_event_outcome_tensor_from_subevents( visit_tm=get_visit_tmap("arrest_first_visit"), events_names_list=arrest_list, double=False, ), channel_map={"no_arrest": 0, "arrest": 1}, path_prefix="edw/*/events/{}/start_date", time_series_limit=2, ) return tm
def create_mrn_tmap(): tmap = TensorMap( name="mrn", shape=(1,), interpretation=Interpretation.LANGUAGE, tensor_from_file=mrn_tensor_from_file, path_prefix=EDW_PREFIX, time_series_limit=0, validators=validator_no_empty, ) return tmap
def create_sex_double_tmap(): tmap = TensorMap( name="sex_double", interpretation=Interpretation.CATEGORICAL, tensor_from_file=sex_double_tensor_from_file, channel_map={"male": 0, "female": 1}, path_prefix=EDW_PREFIX, time_series_limit=2, validators=validator_not_all_zero, ) return tmap
def create_continuous_tmap(key: str) -> TensorMap: tmap = TensorMap( name=key, shape=(1,), interpretation=Interpretation.CONTINUOUS, tensor_from_file=make_static_tensor_from_file(key), path_prefix=EDW_PREFIX, time_series_limit=0, validators=validator_no_negative, ) return tmap
def create_age_tmap(): tmap = TensorMap( name="age", shape=(1,), interpretation=Interpretation.CONTINUOUS, tensor_from_file=admin_age_tensor_from_file, path_prefix=EDW_PREFIX, time_series_limit=0, validators=validator_no_negative, ) return tmap
def create_categorical_tmap(key: str, channel_map: Dict[str, int]) -> TensorMap: tmap = TensorMap( name=key, interpretation=Interpretation.CATEGORICAL, tensor_from_file=make_static_tensor_from_file(key), channel_map=channel_map, path_prefix=EDW_PREFIX, time_series_limit=0, validators=validator_not_all_zero, ) return tmap
def create_static_event_tmap(key: str) -> TensorMap: tmap = TensorMap( name=key, shape=(1,), interpretation=Interpretation.EVENT, tensor_from_file=make_static_tensor_from_file(key), path_prefix=EDW_PREFIX, time_series_limit=0, validators=validator_no_empty, ) return tmap
def create_first_visit_tmap(tm_name: str, signal_name: str, data_type: str): tm = None name = signal_name.replace("|_", "") if tm_name == f"{name}_first_visit": tm = TensorMap( name=tm_name, shape=(1,), interpretation=Interpretation.CONTINUOUS, tensor_from_file=make_first_visit_tensor_from_file(), path_prefix=f"edw/*/{data_type}/{signal_name}/start_date", ) return tm
def create_arrest_first_visit_tmap(tm_name: str): events_names_list = ["code_start", "rapid_response_start"] tm = None if tm_name == "arrest_first_visit": tm = TensorMap( name=tm_name, shape=(1,), interpretation=Interpretation.CONTINUOUS, tensor_from_file=make_general_first_visit_tensor_from_subset( events_names_list, ), path_prefix="edw/*/events/{}/start_date", ) return tm
def test_explore( default_arguments_explore: argparse.Namespace, tmpdir_factory, utils, ): temp_dir = tmpdir_factory.mktemp("explore_tensors") default_arguments_explore.tensors = str(temp_dir) tmaps = pytest.TMAPS_UP_TO_4D[:] tmaps.append( TensorMap( "scalar", shape=(1, ), interpretation=Interpretation.CONTINUOUS, tensor_from_file=pytest.TFF, ), ) explore_expected = utils.build_hd5s(temp_dir, tmaps, n=pytest.N_TENSORS) default_arguments_explore.num_workers = 3 default_arguments_explore.tensor_maps_in = tmaps default_arguments_explore.explore_export_fpath = True explore(default_arguments_explore) csv_path = os.path.join( default_arguments_explore.output_folder, "tensors_union.csv", ) explore_result = pd.read_csv(csv_path) for row in explore_result.iterrows(): row = row[1] for tm in tmaps: row_expected = explore_expected[(row["fpath"], tm)] if _tmap_requires_modification_for_explore(tm): actual = getattr(row, continuous_explore_header(tm) + "_mean") assert not np.isnan(actual) continue if tm.is_continuous: actual = getattr(row, continuous_explore_header(tm)) assert actual == row_expected continue if tm.is_categorical: for channel, idx in tm.channel_map.items(): channel_val = getattr( row, categorical_explore_header(tm, channel), ) assert channel_val == row_expected[idx]
def create_bedmaster_signal_tmap( signal_name: str, signal_type: str, tmap_name: str, field: str, interpretation: Interpretation, dtype=None, ): tmap = TensorMap( name=tmap_name, shape=(None, None, None), interpretation=interpretation, tensor_from_file=make_bedmaster_signal_tensor_from_file(field, dtype), path_prefix=f"bedmaster/*/{signal_type}/{signal_name}", ) return tmap
def create_bedmaster_signal_metadata_tmap( signal_name: str, signal_type: str, field: str, numeric: bool = True, ): tmap = TensorMap( name=f"{signal_name}_{field}", shape=(None,), interpretation=Interpretation.CONTINUOUS if numeric else Interpretation.LANGUAGE, tensor_from_file=make_bedmaster_metadata_tensor_from_file(field, numeric), path_prefix=f"bedmaster/*/{signal_type}/{signal_name}", ) return tmap
def create_sliding_window_outcome_tmap(tmap_name: str) -> Optional[TensorMap]: match = None if not match: pattern = re.compile( r"(\d+)_hrs_sliding_window_(.*)_to_(.*)_(\d+)_hrs_step" r"_(\d+)_hrs_prediction_(\d+)_hrs_gap$", ) match = pattern.findall(tmap_name) if match: window, event_proc_tm_1, event_proc_tm_2, step, prediction, gap = match[ 0] make_tensor_from_file = make_sliding_window_outcome_tensor_from_file visit_tm = get_visit_tmap( re.sub(r"(end_date|start_date)", "first_visit", event_proc_tm_2), ) event_proc_tm_1 = get_signal_tmap(event_proc_tm_1) event_proc_tm_2 = get_signal_tmap(event_proc_tm_2) window = int(window) step = int(step) prediction = int(prediction) gap = int(gap) name = event_proc_tm_2.name return TensorMap( name=tmap_name, shape=(2, ), tensor_from_file=make_tensor_from_file( window=window, step=step, prediction=prediction, gap=gap, event_tm_1=event_proc_tm_1, event_tm_2=event_proc_tm_2, visit_tm=visit_tm, ), channel_map={ f"no_{name}": 0, name: 1 }, path_prefix=event_proc_tm_2.path_prefix, interpretation=Interpretation.CATEGORICAL, validators=validator_no_nans, time_series_limit=0, ) return None
def make_c3po_death_tmap( tmap_name: str, tmaps: Dict[str, TensorMap], ) -> Dict[str, TensorMap]: pattern = "c3po_death_(\d+)_years_post_ecg" years = re.match(pattern, tmap_name) if years is None: return tmaps tmaps[tmap_name] = TensorMap( name=tmap_name, channel_map={ "no_death": 0, "death": 1 }, interpretation=Interpretation.CATEGORICAL, tensor_from_file=make_tensor_from_file_c3po_death(years=int(years[1])), validators=validator_not_all_zero, time_series_limit=0, path_prefix=C3PO_PREFIX, ) return tmaps
channel_map = dict() for idx, value in enumerate(values): channel_map[f"{feature}_{value}"] = idx return channel_map # Categorical (non-binary) for tmap_name in sts_features_categorical: tff = _make_sts_tff_categorical(key=tmap_name) channel_map = _make_sts_categorical_channel_map(feature=tmap_name) tmaps[tmap_name] = TensorMap( name=tmap_name, interpretation=Interpretation.CATEGORICAL, path_prefix=STS_PREFIX, tensor_from_file=tff, channel_map=channel_map, validators=validator_not_all_zero, time_series_limit=0, time_series_filter=get_sts_surgery_dates, ) # Binary for tmap_name in sts_features_binary: tff = _make_sts_tff_binary( key=tmap_name, negative_value=2, positive_value=1, ) channel_map = outcome_channels(tmap_name) tmaps[tmap_name] = TensorMap(
def update_tmaps_window( tmap_name: str, tmaps: Dict[str, TensorMap], ) -> Dict[str, TensorMap]: """ Make new tensor map from base tensor map, making conditional on a date from another source of data. This requires a precise format for tensor map name: [base_tmap_name]_[N]_days_[pre/post]_[other_data_source] e.g. ecg_2500_365_days_pre_echo ecg_2500_365_days_pre_sts_newest av_peak_gradient_30_days_post_echo or [base_tmap_name]_[N]_to_[N]_days_[pre/post]_[other_data_source] e.g. ecg_2500_30_to_90_days_pre_sts Additionally, a special tensor map can be created to get the days between cross referenced events by the following format: [source_name]_[N]_days_[pre/post]_[other_data_source]_days_between_matched_events e.g. ecg_180_days_pre_echo_days_between_matched_events """ pattern_string = ( fr"(.*?)_(\d+_to_)?(\d+)_days_(pre|post)_({'|'.join(CROSS_REFERENCE_SOURCES)})" fr"(_days_between_matched_events)?") pattern = re.compile(pattern_string) match = pattern.match(tmap_name) if match is None: return tmaps # fmt: off # ecg_2500_std_30_to_180_days_pre_echo source_name = match[1] # ecg_2500_std if match[2] is not None: # "30_to_" offset_start = int(match[2].replace("_to_", "")) else: offset_start = "" offset_end = int(match[3]) # 180 pre_or_post = match[4] # pre reference_name = match[5] # echo days_between = match[6] or "" # (empty string) # fmt: on offset_start_str = "" if offset_start == "" else f"{offset_start}_to_" new_name = f"{source_name}_{offset_start_str}{offset_end}_days_{pre_or_post}_{reference_name}{days_between}" # If the tmap should return the number of days between matched events, # source_name is the name of a source dataset if days_between: if source_name not in CROSS_REFERENCE_SOURCES: raise ValueError( f"Source dataset {source_name} not in known cross reference sources; " f"cannot create {new_name}", ) source_prefix, source_dt_col = _get_dataset_metadata( dataset_name=source_name) # Setup time series filter, using the default time series filter if the source # datetime column is None if source_dt_col is not None: time_series_filter = lambda data: data[source_prefix][source_dt_col ] else: time_series_filter = make_default_time_series_filter(source_prefix) # Create a fake base tmap which will be modified with a time series filter # function which returns the number of days between events base_tmap = TensorMap( name=source_name, shape=(1, ), interpretation=Interpretation.CONTINUOUS, path_prefix=source_prefix, tensor_from_file=_days_between_tensor_from_file, time_series_limit=0, time_series_filter=time_series_filter, ) # If not getting days between events, source_name is the name of an underlying tmap # to filter and must exist elif source_name not in tmaps: raise ValueError( f"Base tmap {source_name} not in existing tmaps; cannot create {new_name}", ) # If all checks pass, get base_tmap in the case that it is an existing tmap else: base_tmap = tmaps[source_name] # Copy the base_tmap to modify, either a real tmap or the fake one setup to get # the days between events new_tmap = copy.deepcopy(base_tmap) reference_prefix, reference_dt_col = _get_dataset_metadata( dataset_name=reference_name, ) # One-to-one matching algorithm maximizes the number of matches by pairing events # nearest in time, starting from the most recent event. # 1. Sort source dates from newest -> oldest # 2. Sort reference dates from newest -> oldest # 3. For each reference date, starting from the newest reference date # a. Compute relative time window # b. Take the newest source date in range def get_cross_referenced_dates(data: PatientData) -> Dates: source_dates = base_tmap.time_series_filter(data) # Get dates from reference data reference_data = data[reference_prefix] if isinstance(reference_data, pd.DataFrame): reference_dates = reference_data[ reference_dt_col] # Reference data is CSV else: reference_dates = list(reference_data) # Reference data is HD5 # Convert everything to pd.Series of pd.Timestamp source_is_list = isinstance(source_dates, list) source_dates = pd.Series(source_dates).sort_values(ascending=False) source_dates_dt = pd.to_datetime(source_dates) reference_dates = pd.Series(reference_dates).sort_values( ascending=False) reference_dates = pd.to_datetime(reference_dates) # Set start and end dates relative to an event if pre_or_post == "pre": # e.g. 30_days_pre_echo if offset_start == "": start_dates = reference_dates + datetime.timedelta( days=offset_end * -1) end_dates = reference_dates # e.g. 60_to_30_days_pre_echo else: start_dates = reference_dates + datetime.timedelta( days=offset_start * -1, ) end_dates = reference_dates + datetime.timedelta( days=offset_end * -1) else: # e.g. 30_days_post_echo if offset_start == "": start_dates = reference_dates end_dates = reference_dates + datetime.timedelta( days=offset_end) # e.g. 30_to_60_days_post_echo else: start_dates = reference_dates + datetime.timedelta( days=offset_start) end_dates = reference_dates + datetime.timedelta( days=offset_end) dates = pd.Series(dtype=object) day_differences = pd.Series(dtype=object) for start_date, end_date, reference_date in zip( start_dates, end_dates, reference_dates, ): # Get newest source date in range of start and end dates matched_date = source_dates_dt[source_dates_dt.between( start_date, end_date, inclusive=False)][:1] # If computing the days between events, calculate the day difference between # the reference date and the matched date if days_between: difference = reference_date - matched_date difference = difference.dt.total_seconds() / SECONDS_IN_DAY day_differences = day_differences.append(difference) # If not computing the days between events, return the actual dates else: # Computation is done on pd.Timestamp objects but returned list should # use the original strings/format in source_dates dates = dates.append(source_dates[matched_date.index]) # Remove the matched date from further matching source_dates_dt = source_dates_dt.drop(matched_date.index) if len(dates) == 0 and len(day_differences) == 0: raise ValueError("No cross referenced dates") if days_between: return day_differences elif source_is_list: return list(dates) else: return dates new_tmap.time_series_filter = get_cross_referenced_dates new_tmap.name = new_name tmaps[new_name] = new_tmap return tmaps
def test_tensor_map_equality(): tensor_map_1a = TensorMap( name="tm", loss="logcosh", channel_map={"c1": 1, "c2": 2}, metrics=[], tensor_from_file=pytest.TFF, ) tensor_map_1b = TensorMap( name="tm", loss="logcosh", channel_map={"c1": 1, "c2": 2}, metrics=[], tensor_from_file=pytest.TFF, ) tensor_map_2a = TensorMap( name="tm", loss=logcosh, channel_map={"c1": 1, "c2": 2}, metrics=[], tensor_from_file=pytest.TFF, ) tensor_map_2b = TensorMap( name="tm", loss=logcosh, channel_map={"c2": 2, "c1": 1}, metrics=[], tensor_from_file=pytest.TFF, ) tensor_map_3 = TensorMap( name="tm", loss=logcosh, channel_map={"c1": 1, "c2": 3}, metrics=[], tensor_from_file=pytest.TFF, ) tensor_map_4 = TensorMap( name="tm", loss=logcosh, channel_map={"c1": 1, "c2": 3}, metrics=[all], tensor_from_file=pytest.TFF, ) tensor_map_5a = TensorMap( name="tm", loss=logcosh, channel_map={"c1": 1, "c2": 3}, metrics=[all, any], tensor_from_file=pytest.TFF, ) tensor_map_5b = TensorMap( name="tm", loss=logcosh, channel_map={"c1": 1, "c2": 3}, metrics=[any, all], tensor_from_file=pytest.TFF, ) assert tensor_map_1a == tensor_map_1b assert tensor_map_2a == tensor_map_2b assert tensor_map_1a == tensor_map_2a assert tensor_map_5a == tensor_map_5b assert tensor_map_2a != tensor_map_3 assert tensor_map_3 != tensor_map_4 assert tensor_map_3 != tensor_map_5a assert tensor_map_4 != tensor_map_5a
normalizer = None if standardize == "_scaled": normalizer = RobustScalePopulation( median=echo_measures_continuous[tmap_name]["median"], iqr=echo_measures_continuous[tmap_name]["iqr"], ) else: normalizer = None tmaps[tmap_name + standardize] = TensorMap( name=tmap_name + standardize, shape=(1, ), interpretation=Interpretation.CONTINUOUS, path_prefix=ECHO_PREFIX, tensor_from_file=_make_echo_tff_continuous(key=tmap_key), validators=RangeValidator( minimum=echo_measures_continuous[tmap_name]["min"], maximum=echo_measures_continuous[tmap_name]["max"], ), normalizers=normalizer, time_series_limit=0, time_series_filter=get_echo_dates, ) tmap_name = "echo_datetime" tmaps[tmap_name] = TensorMap( name=tmap_name, shape=(1, ), interpretation=Interpretation.LANGUAGE, path_prefix=ECHO_PREFIX, tensor_from_file=_make_echo_tff_continuous(key=ECHO_DATETIME_COLUMN), validators=validator_no_nans,
def create_static_around_tmap(tm_name: str): pattern = re.compile(r"^age_(.*)_(single|double)$") match = pattern.findall(tm_name) if match: event_proc, samples = match[0] visit_tm = get_visit_tmap( event_proc.replace("end_date", "first_visit").replace( "start_date", "first_visit", ), ) samples = 1 if samples == "single" else 2 return TensorMap( name=tm_name, shape=(samples, ), interpretation=Interpretation.CONTINUOUS, tensor_from_file=admin_age_event_visit_tensor_from_file( visit_tm=visit_tm, samples=samples, ), path_prefix="edw/*", ) pattern = re.compile( r"^(age|length_of_stay)_(\d+)_hrs_sliding_window_(.*)" r"_to_(.*)_(\d+)_hrs_step$", ) match = pattern.findall(tm_name) if match: signal, window, event_proc_tm_1, event_proc_tm_2, step = match[0] visit_tm = get_visit_tmap( event_proc_tm_1.replace("end_date", "first_visit").replace( "start_date", "first_visit", ), ) event_proc_tm_1 = get_signal_tmap(event_proc_tm_1) event_proc_tm_2 = get_signal_tmap(event_proc_tm_2) window = int(window) step = int(step) if signal == "age": tensor_from_file = admin_age_event_visit_tensor_from_file else: tensor_from_file = length_of_stay_sliding_window_tensor_from_file return TensorMap( name=tm_name, shape=(1, ), interpretation=Interpretation.CONTINUOUS, tensor_from_file=tensor_from_file( visit_tm=visit_tm, window=window, step=step, event_tm_1=event_proc_tm_1, event_tm_2=event_proc_tm_2, ), path_prefix="edw/*", ) pattern = re.compile(r"^length_of_stay_(\d+)_hrs_(pre|post)_(.*)$") match = pattern.findall(tm_name) if match: time, period, event_proc_tm = match[0] visit_tm = get_visit_tmap( event_proc_tm.replace("end_date", "first_visit").replace( "start_date", "first_visit", ), ) hrs_to_event = [int(time)] periods = [period] else: pattern = re.compile( r"^length_of_stay_(\d+)_hrs_(pre|post)_(.*)" r"_(\d+)_hrs_(pre|post)_(.*)$", ) match = pattern.findall(tm_name) if match: time_1, period_1, time_2, period_2, event_proc_tm = match[0] hrs_to_event = [int(time_1), int(time_2)] periods = [period_1, period_2] if match: visit_tm = get_visit_tmap( event_proc_tm.replace("end_date", "first_visit").replace( "start_date", "first_visit", ), ) event_proc_tm = get_signal_tmap(event_proc_tm) return TensorMap( name=tm_name, shape=(1, ), interpretation=Interpretation.CONTINUOUS, tensor_from_file=length_of_stay_event_tensor_from_file( visit_tm=visit_tm, event_tm=event_proc_tm, hrs_to_event=hrs_to_event, periods=periods, ), path_prefix="edw/*", ) return None
def create_sliding_window_tmap(tmap_name: str) -> Optional[TensorMap]: match = None imputation_type = None feature = "raw" tmap_match_name = tmap_name pattern = re.compile(r"^(.*)_(mean_imputation)$") match = pattern.findall(tmap_match_name) if match: _, imputation_type = match[0] tmap_match_name = tmap_match_name.replace(f"_{imputation_type}", "") match = None pattern = re.compile(fr"^(.*)_({FEATURES})$") match = pattern.findall(tmap_match_name) if match: _, feature = match[0] tmap_match_name = tmap_match_name.replace(f"_{feature}", "") match = None if not match: pattern = re.compile( r"^(.*)_(\d+)_hrs_sliding_window_(.*)_to_(.*)_(\d+)_hrs_step$", ) match = pattern.findall(tmap_match_name) if match: signal_tm, window, event_proc_tm_1, event_proc_tm_2, step = match[ 0] make_tensor_from_file = make_sliding_window_tensor_from_file if not match: return None if feature == "raw": shape = (None, None) else: shape = (None, ) if signal_tm.endswith("_timeseries"): shape += (2, ) time_tm = get_time_tm(signal_tm) visit_tm = get_visit_tmap( re.sub(r"(end_date|start_date)", "first_visit", event_proc_tm_2), ) signal_tm = _get_tmap(signal_tm) event_proc_tm_1 = get_signal_tmap(event_proc_tm_1) event_proc_tm_2 = get_signal_tmap(event_proc_tm_2) window = int(window) step = int(step) return TensorMap( name=tmap_name, shape=shape, tensor_from_file=make_tensor_from_file( window=window, step=step, event_tm_1=event_proc_tm_1, event_tm_2=event_proc_tm_2, visit_tm=visit_tm, signal_tm=signal_tm, signal_time_tm=time_tm, feature=feature, imputation_type=imputation_type, ), channel_map=signal_tm.channel_map, path_prefix=signal_tm.path_prefix, interpretation=signal_tm.interpretation, validators=validator_no_nans, time_series_limit=0, )
def create_around_tmap(tmap_name: str) -> Optional[TensorMap]: match = None time_2 = None period_2 = None event_proc_tm_2 = None imputation_type = None feature = "raw" shape: Optional[Tuple[Axis, ...]] = None make_tensor_from_file = None tmap_match_name = tmap_name if tmap_name.endswith("_explore"): return None pattern = re.compile(r"^(.*)_(mean_imputation|sample_and_hold)$") match = pattern.findall(tmap_match_name) if match: _, imputation_type = match[0] tmap_match_name = tmap_match_name.replace(f"_{imputation_type}", "") match = None pattern = re.compile(fr"^(.*)_({FEATURES})$") match = pattern.findall(tmap_match_name) if match: _, feature = match[0] tmap_match_name = tmap_match_name.replace(f"_{feature}", "") match = None if not match: pattern = re.compile( r"^(.*)_(\d+)_hrs_(pre|post)_(.*)_(\d+)_hrs_(pre|post)_(.*)" r"_(\d+)_hrs_window$", ) match = pattern.findall(tmap_match_name) if match: ( signal_tm, time_1, period_1, event_proc_tm, time_2, period_2, event_proc_tm_2, window, ) = match[0] make_tensor_from_file = make_around_event_tensor_from_file times = [int(time_1), int(time_2)] periods = [period_1, period_2] if not match: pattern = re.compile( r"^(.*)_(\d+)_hrs_(pre|post)_(.*)_(\d+)_hrs_window$", ) match = pattern.findall(tmap_match_name) if match: signal_tm, time, period, event_proc_tm, window = match[0] make_tensor_from_file = make_around_event_tensor_from_file times = [int(time)] periods = [period] if not match: return None if signal_tm.endswith("_timeseries"): if feature == "raw": shape = (None, 2) else: shape = (2, ) else: if feature == "raw": shape = (None, ) else: shape = (1, ) time_tm = get_time_tm(signal_tm) visit_tm = get_visit_tmap( re.sub(r"(end_date|start_date)", "first_visit", event_proc_tm), ) window = int(window) signal_tm = _get_tmap(signal_tm) event_proc_tms = [get_signal_tmap(event_proc_tm)] if event_proc_tm_2 is not None: event_proc_tms.append(get_signal_tmap(event_proc_tm_2)) return TensorMap( name=tmap_name, shape=shape, tensor_from_file=make_tensor_from_file( times=times, periods=periods, window=window, feature=feature, event_tms=event_proc_tms, visit_tm=visit_tm, signal_tm=signal_tm, signal_time_tm=time_tm, imputation_type=imputation_type, ), channel_map=signal_tm.channel_map, path_prefix=signal_tm.path_prefix, interpretation=signal_tm.interpretation, validators=validator_no_nans, )