def _cox_tensor_from_file(tm: TensorMap, hd5: h5py.File, dependents=None): if error: raise error ecg_dates = _get_ecg_dates(tm, hd5) if len(ecg_dates) > 1: raise NotImplementedError('Cox hazard models for multiple ECGs are not implemented.') dynamic, shape = _is_dynamic_shape(tm, len(ecg_dates)) tensor = np.zeros(tm.shape, dtype=np.float32) for i, ecg_date in enumerate(ecg_dates): patient_key_from_ecg = _hd5_filename_to_mrn_int(hd5.filename) if patient_key_from_ecg not in disease_dicts['follow_up_start']: raise KeyError(f'{tm.name} mrn not in incidence csv') path = _make_hd5_path(tm, ecg_date, 'acquisitiondate') assess_date = _partners_str2date(decompress_data(data_compressed=hd5[path][()], dtype=hd5[path].attrs['dtype'])) if assess_date < disease_dicts['follow_up_start'][patient_key_from_ecg]: raise ValueError(f'Assessed earlier than enrollment.') if patient_key_from_ecg not in disease_dicts['diagnosis_dates']: has_disease = 0 censor_date = disease_dicts['follow_up_start'][patient_key_from_ecg] + datetime.timedelta( days=YEAR_DAYS * disease_dicts['follow_up_total'][patient_key_from_ecg], ) else: has_disease = 1 censor_date = disease_dicts['diagnosis_dates'][patient_key_from_ecg] if incidence_only and censor_date <= assess_date and has_disease: raise ValueError(f'{tm.name} only considers incident diagnoses') tensor[(i, 0) if dynamic else 0] = has_disease tensor[(i, 1) if dynamic else 1] = (censor_date - assess_date).days return tensor
def measurement_matrix_from_file(tm: TensorMap, hd5: h5py.File, dependents: Dict = {}): ecg_dates = _get_ecg_dates(tm, hd5) dynamic, shape = _is_dynamic_shape(tm, len(ecg_dates)) tensor = np.zeros(shape, dtype=float) for i, ecg_date in enumerate(ecg_dates): path = _make_hd5_path(tm, ecg_date, 'measurementmatrix') matrix = decompress_data(data_compressed=hd5[path][()], dtype=hd5[path].attrs['dtype']) tensor[i] = _get_measurement_matrix_entry(matrix, key_idx, lead_idx) return tensor
def _ecg_tensor_from_date(tm: TensorMap, hd5: h5py.File, ecg_date: str, population_normalize: int = None): tensor = np.zeros(tm.shape, dtype=np.float32) for cm in tm.channel_map: path = _make_hd5_path(tm, ecg_date, cm) voltage = decompress_data(data_compressed=hd5[path][()], dtype=hd5[path].attrs['dtype']) voltage = _resample_voltage(voltage, tm.shape[0]) tensor[..., tm.channel_map[cm]] = voltage if population_normalize is not None: tensor /= population_normalize return tensor
def tensor_from_file(tm: TensorMap, hd5: h5py.File, dependents=None): if error: raise error ecg_dates = _get_ecg_dates(tm, hd5) if len(ecg_dates) > 1: raise NotImplementedError('Diagnosis models for multiple ECGs are not implemented.') dynamic, shape = _is_dynamic_shape(tm, len(ecg_dates)) categorical_data = np.zeros(shape, dtype=np.float32) for i, ecg_date in enumerate(ecg_dates): path = lambda key: _make_hd5_path(tm, ecg_date, key) mrn = _hd5_filename_to_mrn_int(hd5.filename) mrn_int = int(mrn) if mrn_int not in patient_table: raise KeyError(f'{tm.name} mrn not in incidence csv') if check_birthday: birth_date = _partners_str2date(decompress_data(data_compressed=hd5[path('dateofbirth')][()], dtype=hd5[path('dateofbirth')].attrs['dtype'])) if birth_date != birth_table[mrn_int]: raise ValueError(f'Birth dates do not match! CSV had {birth_table[patient_key]} but HD5 has {birth_date}') assess_date = _partners_str2date(decompress_data(data_compressed=hd5[path('acquisitiondate')][()], dtype=hd5[path('acquisitiondate')].attrs['dtype'])) if assess_date < patient_table[mrn_int]: raise ValueError(f'{tm.name} Assessed earlier than enrollment') if mrn_int not in date_table: index = 0 else: disease_date = date_table[mrn_int] if incidence_only and disease_date < assess_date: raise ValueError(f'{tm.name} is skipping prevalent cases.') elif incidence_only and disease_date >= assess_date: index = 1 else: index = 1 if disease_date < assess_date else 2 logging.debug(f'mrn: {mrn_int} Got disease_date: {disease_date} assess {assess_date} index {index}.') slices = (i, index) if dynamic else (index,) categorical_data[slices] = 1.0 return categorical_data
def ecg_lvh_from_file(tm: TensorMap, hd5: h5py.File, dependents={}): # Lead order seems constant and standard throughout, but we could eventually tensorize it from XML avl_min = 1100.0 sl_min = 3500.0 cornell_female_min = 2000.0 cornell_male_min = 2800.0 sleads = ['V1', 'V3'] rleads = ['aVL', 'V5', 'V6'] ecg_dates = _get_ecg_dates(tm, hd5) dynamic, shape = _is_dynamic_shape(tm, len(ecg_dates)) tensor = np.zeros(shape, dtype=float) for i, ecg_date in enumerate(ecg_dates): path = _make_hd5_path(tm, ecg_date, 'measurementmatrix') matrix = decompress_data(data_compressed=hd5[path][()], dtype=hd5[path].attrs['dtype']) criteria_sleads = {lead: _get_measurement_matrix_entry(matrix, measurement_matrix_lead_measures['samp'], measurement_matrix_leads[lead]) for lead in sleads} criteria_rleads = {lead: _get_measurement_matrix_entry(matrix, measurement_matrix_lead_measures['ramp'], measurement_matrix_leads[lead]) for lead in rleads} sex_path = _make_hd5_path(tm, ecg_date, 'gender') is_female = 'female' in decompress_data(data_compressed=hd5[sex_path][()], dtype=hd5[sex_path].attrs['dtype']) if 'avl_lvh' in tm.name: is_lvh = criteria_rleads['aVL'] > avl_min elif 'sokolow_lyon_lvh' in tm.name: is_lvh = criteria_sleads['V1'] + np.maximum(criteria_rleads['V5'], criteria_rleads['V6']) > sl_min elif 'cornell_lvh' in tm.name: is_lvh = criteria_rleads['aVL'] + criteria_sleads['V3'] if is_female: is_lvh = is_lvh > cornell_female_min else: is_lvh = is_lvh > cornell_male_min else: raise ValueError(f'{tm.name} criterion for LVH is not accounted for') # Following convention from categorical TMAPS, positive has cmap index 1 index = 1 if is_lvh else 0 slices = (i, index) if dynamic else (index,) tensor[slices] = 1.0 return tensor
def tensor_from_file(tm: TensorMap, hd5: h5py.File, dependents=None): if error: raise error ecg_dates = _get_ecg_dates(tm, hd5) if len(ecg_dates) > 1: raise NotImplementedError('Survival curve models for multiple ECGs are not implemented.') dynamic, shape = _is_dynamic_shape(tm, len(ecg_dates)) survival_then_censor = np.zeros(shape, dtype=np.float32) for ed, ecg_date in enumerate(ecg_dates): patient_key_from_ecg = _hd5_filename_to_mrn_int(hd5.filename) if patient_key_from_ecg not in disease_dicts['follow_up_start']: raise KeyError(f'{tm.name} mrn not in incidence csv') path = _make_hd5_path(tm, ecg_date, 'acquisitiondate') assess_date = _partners_str2date(decompress_data(data_compressed=hd5[path][()], dtype=hd5[path].attrs['dtype'])) if assess_date < disease_dicts['follow_up_start'][patient_key_from_ecg]: raise ValueError(f'Assessed earlier than enrollment.') if patient_key_from_ecg not in disease_dicts['diagnosis_dates']: has_disease = 0 censor_date = disease_dicts['follow_up_start'][patient_key_from_ecg] + datetime.timedelta(days=YEAR_DAYS*disease_dicts['follow_up_total'][patient_key_from_ecg]) else: has_disease = 1 censor_date = disease_dicts['diagnosis_dates'][patient_key_from_ecg] intervals = int(shape[1] if dynamic else shape[0] / 2) days_per_interval = day_window / intervals for i, day_delta in enumerate(np.arange(0, day_window, days_per_interval)): cur_date = assess_date + datetime.timedelta(days=day_delta) survival_then_censor[(ed, i) if dynamic else i] = float(cur_date < censor_date) survival_then_censor[(ed, intervals+i) if dynamic else intervals+i] = has_disease * float(censor_date <= cur_date < censor_date + datetime.timedelta(days=days_per_interval)) if i == 0 and censor_date <= cur_date: # Handle prevalent diseases survival_then_censor[(ed, intervals) if dynamic else intervals] = has_disease if has_disease and incidence_only: raise ValueError(f'{tm.name} is skipping prevalent cases.') logging.debug( f"Got survival disease {has_disease}, censor: {censor_date}, assess {assess_date}, fu start {disease_dicts['follow_up_start'][patient_key_from_ecg]} " f"fu total {disease_dicts['follow_up_total'][patient_key_from_ecg]} tensor:{(survival_then_censor[ed] if dynamic else survival_then_censor)[:4]} mid tense: {(survival_then_censor[ed] if dynamic else survival_then_censor)[intervals:intervals+4]} ", ) return survival_then_censor