def tensor_from_file(tm, data): ecg_dates = tm.time_series_filter(data) dynamic, shape = is_dynamic_shape(tm, len(ecg_dates)) tensor = np.zeros(shape, dtype=float) for i, ecg_date in enumerate(ecg_dates): path = make_hd5_path(tm, ecg_date, lead) try: lead_len = data[path].attrs["len"] lead_len = f"{channel_prefix}{lead_len}" matched = False for cm in tm.channel_map: if lead_len.lower() == cm.lower(): slices = ((i, tm.channel_map[cm]) if dynamic else (tm.channel_map[cm], )) tensor[slices] = 1.0 matched = True break if not matched: slices = ((i, tm.channel_map[channel_unknown]) if dynamic else (tm.channel_map[channel_unknown], )) tensor[slices] = 1.0 except KeyError: logging.debug( f"Could not get voltage length for lead {lead} from ECG on" f" {ecg_date} in {data.id}", ) return tensor
def sampling_frequency_from_file(tm, data): ecg_dates = tm.time_series_filter(data) dynamic, shape = is_dynamic_shape(tm, len(ecg_dates)) if tm.interpretation == Interpretation.CATEGORICAL: tensor = np.zeros(shape, dtype=np.float32) else: tensor = np.full(shape, fill, dtype=np.float32) for i, ecg_date in enumerate(ecg_dates): path = make_hd5_path(tm, ecg_date, lead) lead_length = data[path].attrs["len"] sampling_frequency = lead_length / duration try: if tm.interpretation == Interpretation.CATEGORICAL: matched = False sampling_frequency = f"{channel_prefix}{sampling_frequency}" for cm in tm.channel_map: if sampling_frequency.lower() == cm.lower(): slices = ((i, tm.channel_map[cm]) if dynamic else (tm.channel_map[cm], )) tensor[slices] = 1.0 matched = True break if not matched: slices = ((i, tm.channel_map[channel_unknown]) if dynamic else (tm.channel_map[channel_unknown], )) tensor[slices] = 1.0 else: tensor[i] = sampling_frequency except (KeyError, ValueError): logging.debug( f"Could not calculate sampling frequency from ECG on {ecg_date} in" f" {data.id}", ) return tensor
def tensor_from_file(tm, data): # get all the ecgs in range (time series lookup is set) tm.time_series_limit = 0 ecg_dates = tm.time_series_filter(data) tm.time_series_limit = None tensor = np.zeros(tm.shape, dtype=np.float32) read = "" for ecg_idx, ecg_date in enumerate(ecg_dates): for key in keys: path = make_hd5_path(tm, ecg_date, key) if path not in data: continue read += data[path][()] read = read.lower() if read != "": found = False for channel, channel_idx in sorted( tm.channel_map.items(), key=lambda cm: cm[1], ): if channel not in channel_terms: continue if any( re.search(term.lower(), read) is not None for term in channel_terms[channel]): tensor[channel_idx] = 1 found = True break if not found: not_found_idx = tm.channel_map[not_found_channel] tensor[not_found_idx] = 1 return tensor
def tensor_from_file(tm, data): ecg_dates = tm.time_series_filter(data) dynamic, shape = is_dynamic_shape(tm, len(ecg_dates)) tensor = np.zeros(shape, dtype=np.float32) for ecg_idx, ecg_date in enumerate(ecg_dates): read = "" for key in keys: path = make_hd5_path(tm, ecg_date, key) if path not in data: continue read += data[path][()] read = read.lower() found = False for channel, channel_idx in sorted( tm.channel_map.items(), key=lambda cm: cm[1], ): if channel not in channel_terms: continue if any( re.search(term.lower(), read) is not None for term in channel_terms[channel]): slices = (ecg_idx, channel_idx) if dynamic else (channel_idx, ) tensor[slices] = 1 found = True break if not found: not_found_idx = tm.channel_map[not_found_channel] slices = (ecg_idx, not_found_idx) if dynamic else (not_found_idx, ) tensor[slices] = 1 return tensor
def tensor_from_file(tm, data): ecg_dates = tm.time_series_filter(data) if no_pacemaker: pacemaker_tm.time_series_filter = tm.time_series_filter pacemaker_tensor = pacemaker_tm.tensor_from_file( pacemaker_tm, data) dynamic, shape = is_dynamic_shape(tm, len(ecg_dates)) if conv_2d: shape = shape[:-1] voltage_length = shape[1] if dynamic else shape[0] tensor = np.zeros(shape, dtype=np.float32) for i, ecg_date in enumerate(ecg_dates): if no_pacemaker and pacemaker_tensor[i, 1] == 1: continue for cm in tm.channel_map: try: path = make_hd5_path(tm=tm, date_key=ecg_date, value_key=cm) voltage = data[path][()] path_waveform_samplebase = make_hd5_path( tm=tm, date_key=ecg_date, value_key="waveform_samplebase", ) try: fs = float(data[path_waveform_samplebase][()]) except: fs = 250 if exact_length: assert len(voltage) == voltage_length voltage = _resample_voltage( voltage=voltage, desired_samples=voltage_length, fs=fs, ) slices = ((i, ..., tm.channel_map[cm]) if dynamic else (..., tm.channel_map[cm])) tensor[slices] = voltage except (KeyError, AssertionError, ValueError): logging.debug( f"Could not get voltage for lead {cm} with {voltage_length}" f" samples in {data.id}", ) if conv_2d: tensor = tensor[..., None] return tensor
def voltage_zeros(tm, data): ecg_dates = tm.time_series_filter(data) dynamic, shape = is_dynamic_shape(tm, len(ecg_dates)) tensor = np.zeros(shape, dtype=np.float32) for i, ecg_date in enumerate(ecg_dates): for cm in tm.channel_map: path = make_hd5_path(tm, ecg_date, cm) voltage = data[path][()] slices = (i, tm.channel_map[cm]) if dynamic else ( tm.channel_map[cm], ) tensor[slices] = np.count_nonzero(voltage == 0) return tensor
def ecg_acquisition_year(tm, data): ecg_dates = tm.time_series_filter(data) dynamic, shape = is_dynamic_shape(tm, len(ecg_dates)) tensor = np.zeros(shape, dtype=int) for i, ecg_date in enumerate(ecg_dates): path = make_hd5_path(tm, ecg_date, "acquisitiondate") try: acquisition = data[path][()] tensor[i] = _ecg_str2date(acquisition).year except KeyError: pass return tensor
def tensor_from_file(tm, data): ecg_dates = tm.time_series_filter(data) dynamic, shape = is_dynamic_shape(tm, len(ecg_dates)) tensor = np.zeros(shape, dtype=np.float32) for i, ecg_date in enumerate(ecg_dates): for cm in tm.channel_map: try: path = make_hd5_path(tm, ecg_date, cm) slices = ((i, tm.channel_map[cm]) if dynamic else (tm.channel_map[cm], )) tensor[slices] = data[path].attrs[volt_attr] except KeyError: pass return tensor
def get_ecg_tensor(tm, data): ecg_dates = tm.time_series_filter(data) dynamic, shape = is_dynamic_shape(tm, len(ecg_dates)) if tm.interpretation == Interpretation.LANGUAGE: tensor = np.full(shape, "", dtype=object) elif tm.interpretation == Interpretation.CONTINUOUS: tensor = (np.zeros(shape, dtype=float) if fill == 0 else np.full(shape, fill, dtype=float)) elif tm.interpretation == Interpretation.CATEGORICAL: tensor = np.zeros(shape, dtype=float) else: raise NotImplementedError( f"unsupported interpretation for ecg tmaps: {tm.interpretation}", ) for i, ecg_date in enumerate(ecg_dates): path = make_hd5_path(tm, ecg_date, key) try: value = data[path][()] if tm.interpretation == Interpretation.CATEGORICAL: matched = False value = f"{channel_prefix}{value}" for cm in tm.channel_map: if value.lower() == cm.lower(): slices = ((i, tm.channel_map[cm]) if dynamic else (tm.channel_map[cm], )) tensor[slices] = 1.0 matched = True break if not matched: slices = ((i, tm.channel_map[channel_unknown]) if dynamic else (tm.channel_map[channel_unknown], )) tensor[slices] = 1.0 else: tensor[i] = value except (KeyError, ValueError): logging.debug( f"Could not obtain tensor {tm.name} from ECG on {ecg_date} in" f" {data.id}", ) if tm.interpretation == Interpretation.LANGUAGE: tensor = tensor.astype(str) return tensor
def voltage_stat(tm, data): ecg_dates = tm.time_series_filter(data) dynamic, shape = is_dynamic_shape(tm, len(ecg_dates)) tensor = np.zeros(shape, dtype=np.float32) for i, ecg_date in enumerate(ecg_dates): try: slices = (lambda stat: (i, tm.channel_map[stat]) if dynamic else (tm.channel_map[stat], )) path = lambda lead: make_hd5_path(tm, ecg_date, lead) voltages = np.array( [data[path(lead)][()] for lead in ECG_REST_LEADS_ALL]) tensor[slices("mean")] = np.mean(voltages) tensor[slices("std")] = np.std(voltages) tensor[slices("min")] = np.min(voltages) tensor[slices("max")] = np.max(voltages) tensor[slices("median")] = np.median(voltages) except KeyError: logging.warning( f"Could not get voltage stats for ECG at {data.id}") return tensor
def ecg_bmi(tm, data): ecg_dates = tm.time_series_filter(data) dynamic, shape = is_dynamic_shape(tm, len(ecg_dates)) tensor = np.zeros(shape, dtype=float) for i, ecg_date in enumerate(ecg_dates): path = lambda key: make_hd5_path(tm, ecg_date, key) try: weight_lbs = float(data[path("weightlbs")][()]) height_in = float(data[path("heightin")][()]) if (height_in < MIN_HEIGHT_IN or height_in > MAX_HEIGHT_IN or weight_lbs < MIN_WEIGHT_LBS or weight_lbs > MAX_WEIGHT_LBS): raise ValueError(f"Height/Weight outside valid range") weight_kg = 0.454 * weight_lbs height_m = 0.0254 * height_in bmi = weight_kg / (height_m**2) tensor[i] = bmi except (KeyError, ZeroDivisionError, ValueError): pass return tensor
def get_ecg_age_from_hd5(tm, data): ecg_dates = tm.time_series_filter(data) dynamic, shape = is_dynamic_shape(tm, len(ecg_dates)) tensor = np.full(shape, fill_value=-1, dtype=float) for i, ecg_date in enumerate(ecg_dates): if i >= shape[0]: break path = lambda key: make_hd5_path(tm, ecg_date, key) try: birthday = data[path("dateofbirth")][()] acquisition = data[path("acquisitiondate")][()] delta = _ecg_str2date(acquisition) - _ecg_str2date(birthday) years = delta.days / YEAR_DAYS tensor[i] = years except KeyError: try: tensor[i] = data[path("patientage")][()] except KeyError: logging.debug( f"Could not get patient date of birth or age from ECG on {ecg_date}" f" in {data.id}", ) return tensor