def buildModelColumns(self):
        """Builds a set of wide and deep feature columns."""
        # Continuous columns

        geschlecht = tf.feature_column.categorical_column_with_vocabulary_list(
            'Geschlecht',
            self.dataset_options.getFeatureCategories('Geschlecht'))
        eintrittsalter = tf.feature_column.numeric_column('Eintrittsalter',
                                                          dtype=tf.float32)
        verweildauer = tf.feature_column.numeric_column('Verweildauer',
                                                        dtype=tf.float32)

        categories_hauptdiagnose = self.dataset_options.getFeatureCategories(
            'Hauptdiagnose')
        hauptdiagnose = tf.feature_column.categorical_column_with_vocabulary_list(
            'Hauptdiagnose', categories_hauptdiagnose)
        nebendiagnose = tf.feature_column.categorical_column_with_vocabulary_list(
            'DK', helpers.getDKverylightGrouping())

        feature_columns = []
        feature_columns.append(eintrittsalter)
        feature_columns.append(verweildauer)
        feature_columns.append(tf.feature_column.indicator_column(geschlecht))
        feature_columns.append(
            tf.feature_column.embedding_column(
                categorical_column=hauptdiagnose, dimension=8))
        feature_columns.append(
            tf.feature_column.embedding_column(
                categorical_column=nebendiagnose, dimension=8))
        return feature_columns
    def buildModelColumns(self):
        """Builds a set of wide and deep feature columns."""
        # Continuous columns

        gender = tf.feature_column.categorical_column_with_vocabulary_list(
            'gender', self.dataset_options.getFeatureCategories('gender'))

        main_diag = tf.feature_column.categorical_column_with_vocabulary_list(
            'main_diag',
            self.dataset_options.getFeatureCategories('main_diag'))

        age = tf.feature_column.numeric_column('age_dsch', dtype=tf.float32)
        los = tf.feature_column.numeric_column('los', dtype=tf.float32)

        diag_other = helpers.getDKverylightGrouping()
        other_diag = tf.feature_column.categorical_column_with_vocabulary_list(
            'diag', diag_other)

        # feature_columns = []
        feature_columns = tf.feature_column.shared_embedding_columns(
            [main_diag, other_diag], dimension=128)
        feature_columns.append(age)
        feature_columns.append(los)
        feature_columns.append(
            tf.feature_column.embedding_column(categorical_column=gender,
                                               dimension=1))
        print('len(feature_columns): ' + str(len(feature_columns)))
        # feature_columns.append(tf.feature_column.embedding_column(categorical_column=main_diag, dimension=26))
        # feature_columns.append(tf.feature_column.embedding_column(categorical_column=other_diag,
        #                                                           dimension=26,
        #                                                           combiner='sqrtn'));
        return feature_columns
Ejemplo n.º 3
0
 def getDiagGroupNames(self):
     if self.grouping == 'verylightgrouping':
         group_names = helpers.getDKverylightGrouping();
     elif self.grouping == 'lightgrouping':
         group_names = helpers.getDKlightGrouping();
     elif self.grouping == 'grouping':
         group_names = helpers.getDKgrouping();
     else:
         group_names = [];
     return group_names;
Ejemplo n.º 4
0
 def __getGroupNames(self, group):
     if group == 'CHOP':
         group_names = getCHOPgrouping()
         group_names.insert(0, 'Fall')
     elif group == 'DK':
         if self.options.getGroupingName() == 'grouping':
             group_names = getDKgrouping()
         elif self.options.getGroupingName() == 'lightgrouping':
             group_names = getDKlightGrouping()
         elif self.options.getGroupingName() == 'verylightgrouping':
             group_names = getDKverylightGrouping()
         else:
             print('grouping scheme ist not known...exit')
             sys.exit()
         group_names.insert(0, 'Fall')
     elif group == 'OE':
         group_names = getOEgrouping()
         group_names.insert(0, 'Fall')
     else:
         print('group name is not known...exit')
         sys.exit()
     return group_names
Ejemplo n.º 5
0
    def buildModelColumns(self):
        """Builds a set of wide and deep feature columns."""
        # Continuous columns

        gender = tf.feature_column.categorical_column_with_vocabulary_list(
            'gender', self.dataset_options.getFeatureCategories('gender')
        )
        adm_src = tf.feature_column.categorical_column_with_vocabulary_list(
            'adm_src', self.dataset_options.getFeatureCategories('adm_src')
        )
        adm_type = tf.feature_column.categorical_column_with_vocabulary_list(
            'adm_type', self.dataset_options.getFeatureCategories('adm_type')
        )
        event_type = tf.feature_column.categorical_column_with_vocabulary_list(
            'event_type', self.dataset_options.getFeatureCategories('event_type')
        )
        end_type = tf.feature_column.categorical_column_with_vocabulary_list(
            'end_type', self.dataset_options.getFeatureCategories('end_type')
        )
        facility_type = tf.feature_column.categorical_column_with_vocabulary_list(
            'facility_type', self.dataset_options.getFeatureCategories('facility_type')
        )
        agency_type = tf.feature_column.categorical_column_with_vocabulary_list(
            'agency_type', self.dataset_options.getFeatureCategories('agency_type')
        )
        private_flag = tf.feature_column.categorical_column_with_vocabulary_list(
            'private_flag', self.dataset_options.getFeatureCategories('private_flag')
        )
        purchaser = tf.feature_column.categorical_column_with_vocabulary_list(
            'purchaser', self.dataset_options.getFeatureCategories('purchaser')
        )
        short_stay_flag = tf.feature_column.categorical_column_with_vocabulary_list(
            'Short_Stay_ED_Flag', self.dataset_options.getFeatureCategories('Short_Stay_ED_Flag')
        )
        transfer_event_flag = tf.feature_column.categorical_column_with_vocabulary_list(
            'transfer_event_flag', self.dataset_options.getFeatureCategories('transfer_event_flag')
        )
        main_diag = tf.feature_column.categorical_column_with_vocabulary_list(
            'main_diag', self.dataset_options.getFeatureCategories('main_diag')
        )

        age = tf.feature_column.numeric_column('age_dsch', dtype=tf.float32)
        los = tf.feature_column.numeric_column('los', dtype=tf.float32);

        diag_other = helpers.getDKverylightGrouping()
        other_diag = tf.feature_column.categorical_column_with_vocabulary_list(
            'diag', diag_other
        )

        feature_columns = []
        feature_columns.append(age);
        feature_columns.append(los);
        feature_columns.append(tf.feature_column.indicator_column(adm_src));
        feature_columns.append(tf.feature_column.indicator_column(private_flag));
        feature_columns.append(tf.feature_column.indicator_column(short_stay_flag));
        feature_columns.append(tf.feature_column.indicator_column(transfer_event_flag));
        feature_columns.append(tf.feature_column.embedding_column(categorical_column=gender, dimension=2))
        feature_columns.append(tf.feature_column.embedding_column(categorical_column=event_type, dimension=2))
        feature_columns.append(tf.feature_column.embedding_column(categorical_column=end_type, dimension=2))
        feature_columns.append(tf.feature_column.embedding_column(categorical_column=facility_type, dimension=2))
        feature_columns.append(tf.feature_column.embedding_column(categorical_column=agency_type, dimension=2))
        feature_columns.append(tf.feature_column.embedding_column(categorical_column=purchaser, dimension=2))
        feature_columns.append(tf.feature_column.embedding_column(categorical_column=main_diag, dimension=8))
        feature_columns.append(tf.feature_column.embedding_column(categorical_column=other_diag, dimension=8));

        print('len(feature_columns): ' + str(len(feature_columns)));
        return feature_columns;
Ejemplo n.º 6
0
    '1', '2', '3', '4', '10', '11', '12', '13', '14', '15', '16', '17', '18',
    '19', '20', '21', '22', '23', '24', '25', '99'
]
CATEGORICAL_DATA['agency_type'] = [
    '1', '2', '9', '10', '11', '12', '13', '14', '8'
]
CATEGORICAL_DATA['private_flag'] = ['N', 'Y']
CATEGORICAL_DATA['purchaser'] = [
    '6', '17', '19', '20', '33', '34', '35', '55', '98', 'A0', '1', '2', '3',
    '4', '5', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '18',
    'A1', 'A2', 'A3', 'A4', 'A5', 'A6', 'A7'
]
CATEGORICAL_DATA['Short_Stay_ED_Flag'] = ['N', 'Y']
#CATEGORICAL_DATA['early_readmission_flag'] = ['N', 'Y']
CATEGORICAL_DATA['transfer_event_flag'] = ['N', 'Y']
CATEGORICAL_DATA['main_diag'] = helpers.getDKverylightGrouping()

EXPLICIT_DATA_TYPES = {
    'gender': str,
    'adm_src': str,
    'adm_type': str,
    'event_type': str,
    'end_type': str,
    'facility_type': str,
    'agency_type': str,
    'private_flag': str,
    'purchaser': str,
    'Short_Stay_ED_Flag': str,
    'early_readmission_flag': str,
    'transfer_event_flag': str
}
Ejemplo n.º 7
0
                              'NE Neue.Neueint', 'WE:Wiedereintr.', 'N1:Nierenstein1', 'N2:Nierenstein2',
                              'S: Selbsteinw.'];
CATEGORICAL['Entlassart'] = ['iniDri', 'exPat', 'gSpit', 'vSpit', 'sSpit','Plan', 'inPat', 'iniBeh'];
CATEGORICAL['Eintrittsart'] = ['Ver', 'Not', 'Ang', 'Geb', 'Int', 'unb'];
CATEGORICAL['EntlassBereich'] = ['SaO', 'Med', 'Gyn', 'Oth', 'N.A.'];
CATEGORICAL['Versicherungsklasse'] = ['A', 'S', 'P', 'H'];
CATEGORICAL['Entlassmonat'] = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
CATEGORICAL['Aufnahmemonat'] = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
CATEGORICAL['Aufnahmetag'] = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'];
CATEGORICAL['Entlasstag'] = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'];
CATEGORICAL['Entlassjahr'] = ['2011', '2012', '2013', '2014', '2015', '2016', '2017'];
CATEGORICAL['Aufnahmejahr'] = ['2011', '2012', '2013', '2014', '2015', '2016', '2017'];
CATEGORICAL['Liegestatus'] = ['kurz', 'norm', 'lang', 'vap', 'opti', 'unb'];
CATEGORICAL['Geschlecht'] = ['weiblich', 'maennlich'];
CATEGORICAL['Forschungskonsent'] = ['ein', 'unb'];
CATEGORICAL['Hauptdiagnose'] = helpers.getDKverylightGrouping();
CATEGORICAL['AufnehmOE'] = helpers.getOEgrouping();
CATEGORICAL['EntlassOE'] = helpers.getOEgrouping();
CATEGORICAL['DRGCode'] = helpers.getDRGgrouping()

SUBGROUPS = ['OE', 'DK', 'CHOP']

NUM_DAYS_READMISSION = 18;
EARLY_READMISSION_FLAG = 'Wiederkehrer';
EVENT_FLAG = 'Fall';
HAUPTDIAGNOSE = 'Hauptdiagnose';
NEBENDIAGNOSE = 'DK'

NEW_FEATURES = ['previous_visits', 'ratio_los_age', 'ratio_numDK_age', 'ratio_los_numDK', 'ratio_numCHOP_age',
                    'ratio_los_numOE', 'ratio_numOE_age', 'mult_los_numCHOP', 'mult_equalOE_numDK',
                    'ratio_drg_los_alos'];
import sys
import shutil

import tensorflow as tf
from tensorflow.python.summary import summary

import helpers.helpers as helpers

diag_group_names = helpers.getDKverylightGrouping()


class AutoEncoderEstimator:
    def __init__(self, feature_columns, flags):
        self.feature_columns = feature_columns
        self.flags = flags
        self.estimator = None
        return

    def _add_hidden_layer_summary(self, value, tag):
        summary.scalar('%s/fraction_of_zero_values' % tag,
                       tf.nn.zero_fraction(value))
        summary.histogram('%s/activation' % tag, value)

    def _dense_batch_relu(self, input, num_nodes, phase, layer_name, batchnorm,
                          dropout):
        if batchnorm:
            out = tf.layers.dense(input,
                                  num_nodes,
                                  activation=tf.nn.relu,
                                  name=layer_name)
            out = tf.layers.batch_normalization(out, training=phase)
Ejemplo n.º 9
0
    dirNN = '/Users/towyku74/UniBas/sciCore/projects/PATREC/trained_models/dev/nz_20012011_reduction_FUSION_embedding_verylightgrouping_20_10_10_dropout_0.5_learningrate_0.05_batchnorm_True_batchsize_640/'
    filename_weights_main_diag = dirNN + 'weights_embedding_main_diag.npy'
    weights = np.load(filename_weights_main_diag)

    num_diags = 2600
    num_categories = 26
    cnt = 0
    labels = np.zeros(num_diags)
    labels_maincat = []
    for k in range(0, 26):
        for l in range(0, 100):
            labels[cnt] = k
            labels_maincat.append(alphabet[k])
            cnt += 1

    labels_finegrained = helpers.getDKverylightGrouping()
    filename_labels = dirNN + 'labels_cat.tsv'
    file_labels = open(filename_labels, 'w')
    file_labels.write('main_category' + '\t' + 'category' + '\n')
    for k in range(0, len(labels_maincat)):
        file_labels.write(labels_maincat[k] + '\t' + labels_finegrained[k] +
                          '\n')
    file_labels.close()

    colors = plt.cm.rainbow(np.linspace(0, 1, num_categories))

    pca = PCA(n_components=2)
    weights_2d_pca = pca.fit_transform(weights)

    tsne = TSNE(n_components=2)
    weights_2d_tsne = tsne.fit_transform(weights)