Exemple #1
0
    def _prevalent_incident_tensor_from_file(
        tm: TensorMap,
        hd5: h5py.File,
        dependents=None,
    ):
        index = 0
        categorical_data = np.zeros(tm.shape, dtype=np.float32)
        if tm.hd5_key_guess() in hd5:
            data = tm.hd5_first_dataset_in_group(hd5, tm.hd5_key_guess())
            if tm.storage_type == StorageType.CATEGORICAL_INDEX or tm.storage_type == StorageType.CATEGORICAL_FLAG:
                index = int(data[0])
                categorical_data[index] = 1.0
            else:
                categorical_data = np.array(data)
        elif tm.storage_type == StorageType.CATEGORICAL_FLAG:
            categorical_data[index] = 1.0
        else:
            raise ValueError(
                f"No HD5 Key at prefix {tm.path_prefix} found for tensor map: {tm.name}.",
            )

        if index != 0:
            if event_date_key in hd5 and start_date_key in hd5:
                disease_date = str2date(str(hd5[event_date_key][0]))
                assess_date = str2date(str(hd5[start_date_key][0]))
            else:
                raise ValueError(f"No date found for tensor map: {tm.name}.")
            index = 1 if disease_date < assess_date else 2
        categorical_data[index] = 1.0
        return categorical_data
    def _handle_tm(self, tm: TensorMap, is_input: bool,
                   path: Path) -> h5py.File:
        name = tm.input_name() if is_input else tm.output_name()
        batch = self.in_batch if is_input else self.out_batch
        idx = self.stats['batch_index']

        if tm in self.dependents:
            batch[name][idx] = self.dependents[tm]
            if tm.cacheable:
                self.cache[path, name] = self.dependents[tm]
            self._collect_stats(tm, self.dependents[tm])
            return self.hd5
        if (path, name) in self.cache:
            batch[name][idx] = self.cache[path, name]
            return self.hd5
        if self.hd5 is None:  # Don't open hd5 if everything is in the self.cache
            self.hd5 = h5py.File(path, 'r')
        tensor = tm.postprocess_tensor(tm.tensor_from_file(
            tm, self.hd5, self.dependents),
                                       augment=self.augment,
                                       hd5=self.hd5)
        slices = tuple(
            slice(min(tm.static_shape()[i], tensor.shape[i]))
            for i in range(len(tensor.shape)))
        batch[name][(idx, ) + slices] = tensor[slices]
        if tm.cacheable:
            self.cache[path, name] = batch[name][idx]
        self._collect_stats(tm, tensor)
        return self.hd5
def make_test_tensor_maps(desired_map_name: str) -> TensorMap:
    for n in range(1, 6):
        if desired_map_name == f'{n}d_cont':
            return TensorMap(f'{n}d_cont',
                             shape=tuple(range(2, n + 2)),
                             interpretation=Interpretation.CONTINUOUS)
        if desired_map_name == f'{n}d_cat':
            return TensorMap(
                f'{n}d_cat',
                shape=tuple(range(2, n + 2)),
                interpretation=Interpretation.CATEGORICAL,
                channel_map={f'c_{i}': i
                             for i in range(n + 1)},
            )
Exemple #4
0
def build_cardiac_surgery_tensor_maps(
    needed_name: str,
) -> TensorMap:
    outcome2column = {
        "sts_death": "mtopd",
        "sts_stroke": "cnstrokp",
        "sts_renal_failure": "crenfail",
        "sts_prolonged_ventilation": "cpvntlng",
        "sts_dsw_infection": "deepsterninf",
        "sts_reoperation": "reop",
        "sts_any_morbidity": "anymorbidity",
        "sts_long_stay": "llos",
    }

    cardiac_surgery_dict = None
    date_interval_lookup = None
    if needed_name in outcome2column:
        if cardiac_surgery_dict is None:
            cardiac_surgery_dict = build_cardiac_surgery_dict(
                additional_columns=[outcome2column[needed_name]],
            )
            channel_map = _outcome_channels(needed_name)
            sts_tmap = TensorMap(
                needed_name,
                Interpretation.CATEGORICAL,
                path_prefix=PARTNERS_PREFIX,
                tensor_from_file=make_cardiac_surgery_outcome_tensor_from_file(
                    cardiac_surgery_dict, outcome2column[needed_name],
                ),
                channel_map=channel_map,
                validator=validator_not_all_zero,
            )
        else:
            if needed_name.endswith('_sts'):
                base_name = needed_name.split('_sts')[0]
                tmap_map = build_partners_time_series_tensor_maps([base_name])

                if cardiac_surgery_dict is None:
                    cardiac_surgery_dict = build_cardiac_surgery_dict(
                        additional_columns=[outcome2column[needed_name]],
                    )
                if date_interval_lookup is None:
                    date_interval_lookup = build_date_interval_lookup(cardiac_surgery_dict)
                sts_tmap = copy.deepcopy(tmap_map[base_name])
                sts_tmap.name = needed_name
                sts_tmap.time_series_lookup = date_interval_lookup

    return sts_tmap
Exemple #5
0
def make_mgb_ecg_measurement_matrix_global_tensor_maps(needed_name: str):
    # Measurement matrix TMAPS -- indices from MUSE XML dev manual, page 49 and following
    measurement_matrix_global_measures = {
        'pon': 1,       # P-wave onset in median beat (in samples)
        'poff': 2,      # P-wave offset in median beat
        'qon': 3,       # Q-Onset in median beat
        'qoff': 4,      # Q-Offset in median beat
        'ton': 5,       # T-Onset in median beat
        'toff': 6,      # T-Offset in median beat
        'nqrs': 7,      # Number of QRS Complexes
        'qrsdur': 8,    # QRS Duration
        'qt': 9,        # QT Interval
        'qtc': 10,      # QT Corrected
        'print': 11,    # PR Interval
        'vrate': 12,    # Ventricular Rate
        'avgrr': 13,    # Average R-R Interval
    }
    for measure, measure_idx in measurement_matrix_global_measures.items():
        if f'partners_ecg_measurement_matrix_{measure}' == needed_name:
            return TensorMap(
                f'partners_ecg_measurement_matrix_{measure}',
                interpretation=Interpretation.CONTINUOUS,
                shape=(None, 1),
                path_prefix=PARTNERS_PREFIX,
                loss='logcosh',
                time_series_limit=0,
                tensor_from_file=make_measurement_matrix_from_file(measure_idx),
            )
Exemple #6
0
def make_waveform_maps(desired_map_name: str) -> TensorMap:
    """Creates 12 possible Tensor Maps and returns the desired one or None:

        partners_ecg_2500      partners_ecg_2500_exact      partners_ecg_5000      partners_ecg_5000_exact
        partners_ecg_2500_std  partners_ecg_2500_std_exact  partners_ecg_5000_std  partners_ecg_5000_std_exact
        partners_ecg_2500_raw  partners_ecg_2500_raw_exact  partners_ecg_5000_raw  partners_ecg_5000_raw_exact

        default normalizes with ZeroMeanStd1 and resamples
        _std normalizes with Standardize mean = 0, std = 2000
        _raw does not normalize
        _exact does not resample
    :param desired_map_name: The name of the TensorMap and
    :return: The desired TensorMap
    """
    length_options = [2500, 5000]
    exact_options = [True, False]
    normalize_options = [ZeroMeanStd1(), Standardize(mean=0, std=2000), None]
    for length, exact_length, normalization in product(length_options, exact_options, normalize_options):
        norm = '' if isinstance(normalization, ZeroMeanStd1) else '_std' if isinstance(normalization, Standardize) else '_raw'
        exact = '_exact' if exact_length else ''
        name = f'partners_ecg_{length}{norm}{exact}'
        if name == desired_map_name:
            return TensorMap(
                name,
                shape=(None, length, 12),
                path_prefix=PARTNERS_PREFIX,
                tensor_from_file=make_voltage(exact_length),
                normalization=normalization,
                channel_map=ECG_REST_AMP_LEADS,
                time_series_limit=0,
                validator=validator_not_all_zero,
            )
Exemple #7
0
 def test_explore(self, default_arguments, tmpdir_factory):
     temp_dir = tmpdir_factory.mktemp('explore_tensors')
     default_arguments.tensors = str(temp_dir)
     tmaps = TMAPS_UP_TO_4D[:]
     tmaps.append(
         TensorMap(f'scalar',
                   shape=(1, ),
                   interpretation=Interpretation.CONTINUOUS))
     explore_expected = build_hdf5s(temp_dir, tmaps, n=pytest.N_TENSORS)
     default_arguments.num_workers = 3
     default_arguments.tensor_maps_in = tmaps
     explore(default_arguments)
     csv_path = os.path.join(default_arguments.output_folder,
                             default_arguments.id, 'tensors_all_union.csv')
     explore_result = pd.read_csv(csv_path)
     for row in explore_result.iterrows():
         row = row[1]
         for tm in tmaps:
             row_expected = explore_expected[(row['fpath'], tm)]
             if _should_error_detect(tm):
                 actual = getattr(row, _continuous_explore_header(tm))
                 assert not np.isnan(actual)
                 continue
             if tm.is_continuous():
                 actual = getattr(row, _continuous_explore_header(tm))
                 assert actual == row_expected
                 continue
             if tm.is_categorical():
                 for channel, idx in tm.channel_map.items():
                     channel_val = getattr(
                         row, _categorical_explore_header(tm, channel))
                     assert channel_val == row_expected[idx]
Exemple #8
0
def make_lead_maps(desired_map_name: str) -> TensorMap:
    for lead in ECG_REST_AMP_LEADS:
        tensormap_name = f'lead_{lead}_len'
        if desired_map_name == tensormap_name:
            return TensorMap(
                tensormap_name, interpretation=Interpretation.CATEGORICAL, path_prefix=PARTNERS_PREFIX,
                channel_map={'_2500': 0, '_5000': 1, 'other': 2}, time_series_limit=0, validator=validator_not_all_zero,
                tensor_from_file=make_voltage_len_categorical_tmap(lead=lead),
            )
Exemple #9
0
def build_tensor_maps(
    data_descriptions: List[DataDescription], ) -> List[TensorMap]:
    tmaps = []
    for name, shape, storage_type in data_descriptions:
        tmaps.append(
            TensorMap(
                name,
                interpretation=STORAGE_TYPE_TO_INTERPRETATION[storage_type],
                shape=shape,
            ), )
    return tmaps
Exemple #10
0
 def tensor_from_file(tm: TensorMap, hd5: h5py.File, dependents=None):
     if error:
         raise error
     if normalization:
         tm.normalization = {'mean': mean, 'std': std}
     try:
         return table[os.path.basename(hd5.filename).replace(
             '.hd5',
             '',
         )].copy()
     except KeyError:
         raise KeyError(f'User id not in file {file_name}.')
Exemple #11
0
def make_mgb_ecg_measurement_matrix_lead_tensor_maps(needed_name: str):
    for lead, lead_idx in measurement_matrix_leads.items():
        for measure, measure_idx in measurement_matrix_lead_measures.items():
            if f'partners_ecg_measurement_matrix_{lead}_{measure}' == needed_name:
                return TensorMap(
                    f'partners_ecg_measurement_matrix_{lead}_{measure}',
                    interpretation=Interpretation.CONTINUOUS,
                    shape=(None, 1),
                    path_prefix=PARTNERS_PREFIX,
                    loss='logcosh',
                    time_series_limit=0,
                    tensor_from_file=make_measurement_matrix_from_file(measure_idx, lead_idx=lead_idx),
                )
Exemple #12
0
 def __init__(
         self,
         tensor_map: TensorMap,
         activation: str,
         parents: List[TensorMap] = None,
         **kwargs,
 ):
     self.tensor_map = tensor_map
     if not self.can_apply():
         return
     self.parents = parents
     self.activation = _activation_layer(activation)
     self.dense = Dense(units=tensor_map.shape[0], name=tensor_map.output_name(), activation=tensor_map.activation)
     self.units = tensor_map.annotation_units
Exemple #13
0
def generate_continuous_tensor_map_from_file(
    file_name: str,
    column_name,
    tensor_map_name: str,
    normalization: bool,
    discretization_bounds: List[float],
) -> TensorMap:
    if discretization_bounds:
        return TensorMap(
            f'{tensor_map_name}',
            Interpretation.DISCRETIZED,
            channel_map={tensor_map_name: 0},
            tensor_from_file=build_tensor_from_file(file_name, column_name,
                                                    normalization),
            discretization_bounds=discretization_bounds,
        )
    else:
        return TensorMap(
            f'{tensor_map_name}',
            channel_map={tensor_map_name: 0},
            tensor_from_file=build_tensor_from_file(file_name, column_name,
                                                    normalization),
        )
Exemple #14
0
def make_mgb_ecg_lvh_tensormaps(needed_name: str):
    def ecg_lvh_from_file(tm: TensorMap, hd5: h5py.File, dependents={}):
        # Lead order seems constant and standard throughout, but we could eventually tensorize it from XML
        avl_min = 1100.0
        sl_min = 3500.0
        cornell_female_min = 2000.0
        cornell_male_min = 2800.0
        sleads = ['V1', 'V3']
        rleads = ['aVL', 'V5', 'V6']
        ecg_dates = _get_ecg_dates(tm, hd5)
        dynamic, shape = _is_dynamic_shape(tm, len(ecg_dates))
        tensor = np.zeros(shape, dtype=float)

        for i, ecg_date in enumerate(ecg_dates):
            path = _make_hd5_path(tm, ecg_date, 'measurementmatrix')
            matrix = decompress_data(data_compressed=hd5[path][()], dtype=hd5[path].attrs['dtype'])
            criteria_sleads = {lead: _get_measurement_matrix_entry(matrix, measurement_matrix_lead_measures['samp'], measurement_matrix_leads[lead]) for lead in sleads}
            criteria_rleads = {lead: _get_measurement_matrix_entry(matrix, measurement_matrix_lead_measures['ramp'], measurement_matrix_leads[lead]) for lead in rleads}
            sex_path = _make_hd5_path(tm, ecg_date, 'gender')
            is_female = 'female' in decompress_data(data_compressed=hd5[sex_path][()], dtype=hd5[sex_path].attrs['dtype'])
            if 'avl_lvh' in tm.name:
                is_lvh = criteria_rleads['aVL'] > avl_min
            elif 'sokolow_lyon_lvh' in tm.name:
                is_lvh = criteria_sleads['V1'] + np.maximum(criteria_rleads['V5'], criteria_rleads['V6']) > sl_min
            elif 'cornell_lvh' in tm.name:
                is_lvh = criteria_rleads['aVL'] + criteria_sleads['V3']
                if is_female:
                    is_lvh = is_lvh > cornell_female_min
                else:
                    is_lvh = is_lvh > cornell_male_min
            else:
                raise ValueError(f'{tm.name} criterion for LVH is not accounted for')
            # Following convention from categorical TMAPS, positive has cmap index 1
            index = 1 if is_lvh else 0
            slices = (i, index) if dynamic else (index,)
            tensor[slices] = 1.0
        return tensor

    for criterion in ['avl_lvh', 'sokolow_lyon_lvh', 'cornell_lvh']:
        if f'partners_ecg_{criterion}' == needed_name:
            return TensorMap(
                f'partners_ecg_{criterion}',
                interpretation=Interpretation.CATEGORICAL,
                path_prefix=PARTNERS_PREFIX,
                tensor_from_file=ecg_lvh_from_file,
                channel_map={f'no_{criterion}': 0, criterion: 1},
                shape=(None, 2),
                time_series_limit=0,
            )
Exemple #15
0
def generate_random_text_tensor_maps(
        text_file: str,
        window_size: int,
        one_hot: bool = True) -> Tuple[TensorMap, TensorMap]:
    name = os.path.basename(text_file).split('.')[0]
    text, token_dictionary = token_dictionary_and_text_from_file(text_file)
    shape = (window_size,
             len(token_dictionary)) if one_hot else (window_size, )
    burn_in = TensorMap(
        f'next_{name}',
        Interpretation.LANGUAGE,
        shape=shape,
        channel_map=token_dictionary,
        cacheable=False,
    )
    output_map = TensorMap(
        f'next_next_{name}',
        Interpretation.LANGUAGE,
        shape=(len(token_dictionary), ) if one_hot else shape,
        loss='categorical_crossentropy',
        channel_map=token_dictionary,
        cacheable=False,
    )
    input_map = TensorMap(
        name,
        Interpretation.LANGUAGE,
        shape=shape,
        tensor_from_file=random_text_window_tensor(text,
                                                   window_size,
                                                   one_hot=one_hot),
        dependent_map=[burn_in, output_map],
        channel_map=token_dictionary,
        annotation_units=128,
        cacheable=False,
    )
    return input_map, burn_in, output_map
Exemple #16
0
 def __init__(
         self,
         *,
         tensor_map: TensorMap,
         dense_layers: List[int],
         activation: str,
         dense_normalize: str,
         dense_regularize: str,
         dense_regularize_rate: float,
         **kwargs,
 ):
     self.tensor_map = tensor_map
     if not self.can_apply():
         return
     self.fully_connected = DenseBlock(
         widths=dense_layers,
         activation=activation,
         normalization=dense_normalize,
         regularization=dense_regularize,
         regularization_rate=dense_regularize_rate,
         name=tensor_map.embed_name(),
     )
Exemple #17
0
def make_wide_file_maps(desired_map_name: str) -> Union[TensorMap, None]:
    days_window = 1825

    if desired_map_name == 'sex_from_wide_csv':
        csv_tff = tensor_from_wide(WIDE_FILE, target='sex')
        return TensorMap(
            'sex_from_wide', Interpretation.CATEGORICAL, annotation_units=2, tensor_from_file=csv_tff,
            channel_map={'female': 0, 'male': 1},
        )
    elif desired_map_name == 'age_from_wide_csv':
        csv_tff = tensor_from_wide(WIDE_FILE, target='age')
        return TensorMap(
            'age_from_wide', Interpretation.CONTINUOUS, shape=(1,),
            tensor_from_file=csv_tff, channel_map={'age': 0},
            normalization={'mean': 63.35798891483556, 'std': 7.554638350423902},
        )
    elif desired_map_name == 'bmi_from_wide_csv':
        csv_tff = csv_tff = tensor_from_wide(WIDE_FILE, target='bmi')
        return TensorMap(
            'bmi_from_wide', Interpretation.CONTINUOUS, shape=(1,), channel_map={'bmi': 0},
            annotation_units=1, normalization={'mean': 27.3397, 'std': 4.77216}, tensor_from_file=csv_tff,
        )
    elif desired_map_name == 'ecg_2500_from_wide_csv':
        tff = tensor_from_wide(WIDE_FILE, target='ecg')
        return TensorMap(
            'ecg_rest_raw', shape=(2500, 12), path_prefix=PARTNERS_PREFIX, tensor_from_file=tff,
            cacheable=False, channel_map=ECG_REST_UKB_LEADS,
        )
    elif desired_map_name == 'ecg_5000_from_wide_csv':
        tff = tensor_from_wide(WIDE_FILE, target='ecg')
        return TensorMap(
            'ecg_rest_raw', shape=(5000, 12), path_prefix=PARTNERS_PREFIX, tensor_from_file=tff,
            cacheable=False, channel_map=ECG_REST_UKB_LEADS,
        )
    elif desired_map_name == 'time_to_hf_wide_csv':
        tff = tensor_from_wide(WIDE_FILE, target='time_to_event')
        return TensorMap('time_to_hf', Interpretation.TIME_TO_EVENT, tensor_from_file=tff)
    elif desired_map_name == 'survival_curve_hf_wide_csv':
        tff = tensor_from_wide(WIDE_FILE, target='survival_curve')
        return TensorMap('survival_curve_hf', Interpretation.SURVIVAL_CURVE, tensor_from_file=tff, shape=(50,), days_window=days_window)
Exemple #18
0
def make_partners_diagnosis_maps(desired_map_name: str) -> Union[TensorMap, None]:
    diagnosis2column = {
        'atrial_fibrillation': 'first_af', 'blood_pressure_medication': 'first_bpmed',
        'coronary_artery_disease': 'first_cad', 'cardiovascular_disease': 'first_cvd',
        'death': 'death_date', 'diabetes_mellitus': 'first_dm', 'heart_failure': 'first_hf',
        'hypertension': 'first_htn', 'left_ventricular_hypertrophy': 'first_lvh',
        'myocardial_infarction': 'first_mi', 'pulmonary_artery_disease': 'first_pad',
        'stroke': 'first_stroke', 'valvular_disease': 'first_valvular_disease',
    }
    for diagnosis in diagnosis2column:
        # Build diagnosis classification TensorMaps
        name = f'diagnosis_{diagnosis}'
        if name == desired_map_name:
            tensor_from_file_fxn = build_incidence_tensor_from_file(INCIDENCE_CSV, diagnosis_column=diagnosis2column[diagnosis])
            return TensorMap(f'{name}_newest', Interpretation.CATEGORICAL, path_prefix=PARTNERS_PREFIX, channel_map=_diagnosis_channels(diagnosis), tensor_from_file=tensor_from_file_fxn)
        name = f'incident_diagnosis_{diagnosis}'
        if name == desired_map_name:
            tensor_from_file_fxn = build_incidence_tensor_from_file(INCIDENCE_CSV, diagnosis_column=diagnosis2column[diagnosis], incidence_only=True)
            return TensorMap(f'{name}_newest', Interpretation.CATEGORICAL, path_prefix=PARTNERS_PREFIX, channel_map=_diagnosis_channels(diagnosis, incidence_only=True), tensor_from_file=tensor_from_file_fxn)

        # Build time to event TensorMaps
        name = f'cox_{diagnosis}'
        if name == desired_map_name:
            tff = loyalty_time_to_event(INCIDENCE_CSV, diagnosis_column=diagnosis2column[diagnosis])
            return TensorMap(f'{name}_newest', Interpretation.TIME_TO_EVENT, path_prefix=PARTNERS_PREFIX, tensor_from_file=tff)
        name = f'incident_cox_{diagnosis}'
        if name == desired_map_name:
            tff = loyalty_time_to_event(INCIDENCE_CSV, diagnosis_column=diagnosis2column[diagnosis], incidence_only=True)
            return TensorMap(f'{name}_newest', Interpretation.TIME_TO_EVENT, path_prefix=PARTNERS_PREFIX, tensor_from_file=tff)

        # Build survival curve TensorMaps
        for days_window in [1825]:
            name = f'survival_{diagnosis}_{days_window}'
            if name == desired_map_name:
                tff = _survival_from_file(days_window, INCIDENCE_CSV, diagnosis_column=diagnosis2column[diagnosis])
                return TensorMap(f'{name}', Interpretation.SURVIVAL_CURVE, path_prefix=PARTNERS_PREFIX, shape=(50,), days_window=days_window, tensor_from_file=tff)
            name = f'incident_survival_{diagnosis}'
            if name == desired_map_name:
                tff = _survival_from_file(days_window, INCIDENCE_CSV, diagnosis_column=diagnosis2column[diagnosis], incidence_only=True)
                return TensorMap(f'{name}', Interpretation.SURVIVAL_CURVE, path_prefix=PARTNERS_PREFIX, shape=(50,), days_window=days_window, tensor_from_file=tff)
Exemple #19
0
 def __init__(
     self,
     *,
     tensor_map: TensorMap,
     dense_blocks: List[int],
     conv_type: str,
     conv_width: List[int],
     conv_x: List[int],
     conv_y: List[int],
     conv_z: List[int],
     block_size: int,
     activation: str,
     conv_normalize: str,
     conv_regularize: str,
     conv_regularize_rate: float,
     pool_x: int,
     pool_y: int,
     pool_z: int,
     u_connect_parents: List[TensorMap] = None,
     **kwargs,
 ):
     self.tensor_map = tensor_map
     if not self.can_apply():
         return
     dimension = tensor_map.axes()
     x_filters = _repeat_dimension(conv_width if dimension == 2 else conv_x,
                                   len(dense_blocks))
     y_filters = _repeat_dimension(conv_y, len(dense_blocks))
     z_filters = _repeat_dimension(conv_z, len(dense_blocks))
     self.dense_conv_blocks = [
         DenseConvolutional(
             dimension=tensor_map.axes(),
             conv_layer_type=conv_type,
             filters=filters,
             conv_x=[x] * block_size,
             conv_y=[y] * block_size,
             conv_z=[z] * block_size,
             block_size=block_size,
             activation=activation,
             normalization=conv_normalize,
             regularization=conv_regularize,
             regularization_rate=conv_regularize_rate,
         ) for filters, x, y, z in zip(dense_blocks, x_filters, y_filters,
                                       z_filters)
     ]
     conv_layer, _ = _conv_layer_from_kind_and_dimension(
         dimension, 'conv', conv_x, conv_y, conv_z)
     self.conv_label = conv_layer(tensor_map.shape[-1],
                                  _one_by_n_kernel(dimension),
                                  activation=tensor_map.activation,
                                  name=tensor_map.output_name())
     self.upsamples = [
         _upsampler(dimension, pool_x, pool_y, pool_z)
         for _ in range(len(dense_blocks) + 1)
     ]
     self.u_connect_parents = u_connect_parents or []
     self.start_shape = _start_shape_before_pooling(
         num_upsamples=len(dense_blocks),
         output_shape=tensor_map.shape,
         upsample_rates=[pool_x, pool_y, pool_z],
         channels=dense_blocks[-1])
     self.reshape = FlatToStructure(output_shape=self.start_shape,
                                    activation=activation,
                                    normalization=conv_normalize)
     logging.info(
         f'Built a decoder with: {len(self.dense_conv_blocks)} and reshape {self.start_shape}'
     )
Exemple #20
0
def normalized_first_date(tm: TensorMap, hd5: h5py.File, dependents=None):
    tensor = get_tensor_at_first_date(hd5, tm.path_prefix, tm.name)
    if tm.axes() > 1:
        return pad_or_crop_array_to_shape(tm.shape, tensor)
    else:
        return tensor
Exemple #21
0
from typing import Dict

import h5py
import numpy as np

from ml4h.TensorMap import TensorMap, Interpretation


def mnist_image_from_hd5(tm: TensorMap,
                         hd5: h5py.File,
                         dependents: Dict = {}) -> np.ndarray:
    return np.array(hd5['mnist_image'])


mnist_image = TensorMap('mnist_image',
                        shape=(28, 28, 1),
                        tensor_from_file=mnist_image_from_hd5)


def mnist_label_from_hd5(tm: TensorMap,
                         hd5: h5py.File,
                         dependents: Dict = {}) -> np.ndarray:
    one_hot = np.zeros(tm.shape, dtype=np.float32)
    one_hot[int(hd5['mnist_label'][0])] = 1.0
    return one_hot


mnist_label = TensorMap(
    'mnist_label',
    Interpretation.CATEGORICAL,
    tensor_from_file=mnist_label_from_hd5,
Exemple #22
0
import os
import h5py
import numpy as np
from typing import List, Tuple, Dict
from itertools import product

from ml4h.defines import TENSOR_EXT
from ml4h.TensorMap import TensorMap, Interpretation

CONTINUOUS_TMAPS = [
    TensorMap(f'{n}d_cont',
              shape=tuple(range(2, n + 2)),
              interpretation=Interpretation.CONTINUOUS) for n in range(1, 6)
]
CATEGORICAL_TMAPS = [
    TensorMap(
        f'{n}d_cat',
        shape=tuple(range(2, n + 2)),
        interpretation=Interpretation.CATEGORICAL,
        channel_map={f'c_{i}': i
                     for i in range(n + 1)},
    ) for n in range(1, 6)
]
LANGUAGE_TMAP_1HOT_WINDOW = [
    TensorMap(
        f'language_1hot_window',
        shape=(32, 26),
        interpretation=Interpretation.LANGUAGE,
        channel_map={f'c_{i}': i
                     for i in range(26)},
    ),
Exemple #23
0
            data = tm.hd5_first_dataset_in_group(
                hd5,
                key_prefix=f'{tm.path_prefix}/{k}',
            )
            drinks += float(data[0])
        return np.array([drinks], dtype=np.float32)

    return alcohol_from_file


log_25781_2 = TensorMap(
    '25781_Total-volume-of-white-matter-hyperintensities-from-T1-and-T2FLAIR-images_2_0',
    loss='logcosh',
    path_prefix='continuous',
    normalization={
        'mean': 7,
        'std': 8,
    },
    tensor_from_file=preprocess_with_function(np.log),
    channel_map={'white-matter-hyper-intensities': 0},
)

weight_lbs_2 = TensorMap(
    'weight_lbs',
    Interpretation.CONTINUOUS,
    normalization={
        'mean': 168.74,
        'std': 34.1,
    },
    loss='logcosh',
    channel_map={'weight_lbs': 0},
Exemple #24
0
 def __init__(
         self,
         tensor_map: TensorMap,
 ):
     self.tensor_map = tensor_map
     if not self.can_apply():
         return
     self.dense = Dense(tensor_map.shape[-1], activation=tensor_map.activation, name=tensor_map.output_name())
def _weighted_batch(in_batch: Batch, out_batch: Batch, return_paths: bool,
                    paths: List[Path], sample_weight: TensorMap):
    sample_weights = [in_batch.pop(sample_weight.input_name()).flatten()
                      ] * len(out_batch)
    return (in_batch, out_batch, sample_weights,
            paths) if return_paths else (in_batch, out_batch, sample_weights)
Exemple #26
0
from ml4h.TensorMap import TensorMap, Interpretation
from ml4h.defines import StorageType
from ml4h.metrics import weighted_crossentropy

diploid_cm = {
    'homozygous_reference': 0,
    'heterozygous': 1,
    'homozygous_variant': 2
}
rs3829740 = TensorMap('rs3829740',
                      Interpretation.CATEGORICAL,
                      channel_map=diploid_cm)
rs2234962 = TensorMap('rs2234962',
                      Interpretation.CATEGORICAL,
                      channel_map=diploid_cm)
rs2042995 = TensorMap('rs2042995',
                      Interpretation.CATEGORICAL,
                      channel_map=diploid_cm)

rs3829740_weighted = TensorMap('rs3829740',
                               Interpretation.CATEGORICAL,
                               channel_map=diploid_cm,
                               loss=weighted_crossentropy([1, 1, 1.5],
                                                          'rs3829740'))
rs2234962_weighted = TensorMap('rs2234962',
                               Interpretation.CATEGORICAL,
                               channel_map=diploid_cm,
                               loss=weighted_crossentropy([.8, 1, 1.5],
                                                          'rs2234962'))
rs2042995_weighted = TensorMap('rs2042995',
                               Interpretation.CATEGORICAL,
Exemple #27
0
        if incidence_only and censor_date <= assess_date:
            raise ValueError(f'{tm.name} only considers incident diagnoses')

        tensor = np.zeros(tm.shape, dtype=np.float32)
        tensor[0] = has_disease
        tensor[1] = (censor_date - assess_date).days
        return tensor

    return _cox_tensor_from_file


enroll_cad_hazard = TensorMap(
    'coronary_artery_disease',
    Interpretation.SURVIVAL_CURVE,
    shape=(50, ),
    days_window=DAYS_IN_5_YEARS,
    tensor_from_file=_survival_tensor('dates/enroll_date', DAYS_IN_5_YEARS),
)
enroll_hyp_hazard = TensorMap(
    'hypertension',
    Interpretation.SURVIVAL_CURVE,
    shape=(50, ),
    days_window=DAYS_IN_5_YEARS,
    tensor_from_file=_survival_tensor('dates/enroll_date', DAYS_IN_5_YEARS),
)
enroll_afib_hazard = TensorMap(
    'atrial_fibrillation_or_flutter',
    Interpretation.SURVIVAL_CURVE,
    shape=(50, ),
    days_window=DAYS_IN_5_YEARS,
Exemple #28
0
                        writer_segmented.SetFileName(
                            os.path.join(
                                save_path,
                                f'{tm.name}_segmented_{ds_i}_{ds_j}_{s}.vtp',
                            ), )
                        writer_segmented.Update()
        return tensor

    return mri_projected_segmentation


cine_segmented_lax_2ch_proj_from_sax = TensorMap(
    'cine_segmented_lax_2ch_proj_from_sax',
    Interpretation.CONTINUOUS,
    shape=(256, 256, 50),
    loss='logcosh',
    tensor_from_file=_make_mri_projected_segmentation_from_file(
        'cine_segmented_lax_2ch',
        MRI_SEGMENTED,
    ),
)
cine_segmented_lax_3ch_proj_from_sax = TensorMap(
    'cine_segmented_lax_3ch_proj_from_sax',
    Interpretation.CONTINUOUS,
    shape=(256, 256, 50),
    loss='logcosh',
    tensor_from_file=_make_mri_projected_segmentation_from_file(
        'cine_segmented_lax_3ch',
        MRI_SEGMENTED,
    ),
)
cine_segmented_lax_4ch_proj_from_sax = TensorMap(
Exemple #29
0
def reshape_resting_ecg_to_tidy(
    sample_id: Union[int, str], folder: Optional[str] = None, tmap: TensorMap = DEFAULT_RESTING_ECG_SIGNAL_TMAP,
) -> pd.DataFrame:
  """Wrangle resting ECG data to tidy.

  Args:
    sample_id: The id of the ECG sample to retrieve.
    folder: The local or Cloud Storage folder under which the files reside.
    tmap: The TensorMap to use for ECG input.

  Returns:
    A pandas dataframe in tidy format or print a notebook-friendly error and return an empty dataframe.
  """
  if folder is None:
    folder = get_resting_ecg_hd5_folder(sample_id)

  data: Dict[str, Any] = {'lead': [], 'raw': [], 'ts_reference': [], 'filtered': [], 'filtered_1': [], 'filtered_2': []}

  with tempfile.TemporaryDirectory() as tmpdirname:
    sample_hd5 = str(sample_id) + '.hd5'
    local_path = os.path.join(tmpdirname, sample_hd5)
    try:
      tf.io.gfile.copy(src=os.path.join(folder, sample_hd5), dst=local_path)
    except (tf.errors.NotFoundError, tf.errors.PermissionDeniedError) as e:
      print(f'''Warning: Resting ECG not available for sample {sample_id} in folder {folder}.
      Use the folder parameter to read HD5s from a different directory or bucket.\n\n{e.message}''')
      return pd.DataFrame(data)

    with h5py.File(local_path, mode='r') as hd5:
      try:
        signals = tmap.tensor_from_file(tmap, hd5)
      except (KeyError, ValueError) as e:
        print(f'''Warning: Resting ECG TMAP {tmap.name} not available for sample {sample_id}.
        Use the tmap parameter to choose a different TMAP.\n\n{e}''')
        _examine_available_keys(hd5)
        return pd.DataFrame(data)
      for (lead, channel) in ECG_REST_LEADS.items():
        signal = signals[:, channel]
        signal_length = len(signal)
        data['raw'].extend(signal)
        data['lead'].extend([lead] * signal_length)
        data['ts_reference'].extend(np.array([i*1./(SAMPLING_RATE+1.) for i in range(0, signal_length)]))
        filtered, _, _ = filter_signal(
            signal=signal,
            ftype='FIR',
            band='bandpass',
            order=int(0.3 * SAMPLING_RATE),
            frequency=[.9, 50],
            sampling_rate=SAMPLING_RATE,
        )
        data['filtered'].extend(filtered)
        filtered_1, _, _ = filter_signal(
            signal=signal,
            ftype='FIR',
            band='bandpass',
            order=int(0.3 * SAMPLING_RATE),
            frequency=[.9, 20],
            sampling_rate=SAMPLING_RATE,
        )
        data['filtered_1'].extend(filtered_1)
        filtered_2, _, _ = filter_signal(
            signal=signal,
            ftype='FIR',
            band='bandpass',
            order=int(0.3 * SAMPLING_RATE),
            frequency=[.9, 30],
            sampling_rate=SAMPLING_RATE,
        )
        data['filtered_2'].extend(filtered_2)

  signal_df = pd.DataFrame(data)
  # Convert the raw signal to mV.
  signal_df['raw_mV'] = signal_df['raw'] * RAW_SCALE
  signal_df['filtered_mV'] = signal_df['filtered'] * RAW_SCALE
  signal_df['filtered_1_mV'] = signal_df['filtered_1'] * RAW_SCALE
  signal_df['filtered_2_mV'] = signal_df['filtered_2'] * RAW_SCALE
  # Reshape to tidy (long format).
  tidy_signal_df = signal_df.melt(
      id_vars=['lead', 'ts_reference'],
      value_vars=['raw_mV', 'filtered_mV', 'filtered_1_mV', 'filtered_2_mV'],
      var_name='filtering', value_name='signal_mV',
  )

  # The leads have a meaningful order, apply the order to this column.
  lead_factor_type = pd.api.types.CategoricalDtype(
      categories=[
          'strip_I', 'strip_aVR', 'strip_V1', 'strip_V4',
          'strip_II', 'strip_aVL', 'strip_V2', 'strip_V5',
          'strip_III', 'strip_aVF', 'strip_V3', 'strip_V6',
      ],
      ordered=True,
  )
  tidy_signal_df['lead'] = tidy_signal_df.lead.astype(lead_factor_type)

  return tidy_signal_df