Beispiel #1
0
target_repl = (args.target_repl_coef > 0.0 and args.mode == 'train')

# Build readers, discretizers, normalizers
train_reader = InHospitalMortalityReader(
    dataset_dir=os.path.join(args.data, 'train'),
    listfile=os.path.join(args.data, 'train_listfile.csv'),
    period_length=48.0)

val_reader = InHospitalMortalityReader(
    dataset_dir=os.path.join(args.data, 'train'),
    listfile=os.path.join(args.data, 'val_listfile.csv'),
    period_length=48.0)

discretizer = Discretizer(timestep=float(args.timestep),
                          store_masks=True,
                          impute_strategy='previous',
                          start_time='zero')

discretizer_header = discretizer.transform(
    train_reader.read_example(0)["X"])[1].split(',')
cont_channels = [
    i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1
]

normalizer = Normalizer(
    fields=cont_channels)  # choose here which columns to standardize
normalizer_state = args.normalizer_state
if normalizer_state is None:
    normalizer_state = 'ihm_ts{}.input_str:{}.start_time:zero.normalizer'.format(
        args.timestep, args.imputation)
    normalizer_state = os.path.join(os.path.dirname(__file__),
Beispiel #2
0
print args

if args.small_part:
    args.save_every = 2**30

target_repl = (args.target_repl_coef > 0.0 and args.mode == 'train')

# Build readers, discretizers, normalizers
train_reader = PhenotypingReader(dataset_dir='../../data/phenotyping/train/',
                                 listfile='../../data/phenotyping/train_listfile.csv')

val_reader = PhenotypingReader(dataset_dir='../../data/phenotyping/train/',
                               listfile='../../data/phenotyping/val_listfile.csv')

discretizer = Discretizer(timestep=float(args.timestep),
                          store_masks=True,
                          imput_strategy='previous',
                          start_time='zero')

discretizer_header = discretizer.transform(train_reader.read_example(0)["X"])[1].split(',')
cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1]

normalizer = Normalizer(fields=cont_channels)  # choose here onlycont vs all
normalizer.load_params('ph_ts{}.input_str:previous.start_time:zero.normalizer'.format(args.timestep))

args_dict = dict(args._get_kwargs())
args_dict['header'] = discretizer_header
args_dict['task'] = 'ph'
args_dict['num_classes'] = 25
args_dict['target_repl'] = target_repl

# Build the model
Beispiel #3
0
        listfile=os.path.join(args.data, 'train_listfile.csv'),
        small_part=args.small_part)
    val_data_loader = common_utils.DeepSupervisionDataLoader(
        dataset_dir=os.path.join(args.data, 'train'),
        listfile=os.path.join(args.data, 'val_listfile.csv'),
        small_part=args.small_part)
else:
    train_reader = LengthOfStayReader(
        dataset_dir=os.path.join(args.data, 'train'),
        listfile=os.path.join(args.data, 'train_listfile.csv'))
    val_reader = LengthOfStayReader(
        dataset_dir=os.path.join(args.data, 'train'),
        listfile=os.path.join(args.data, 'val_listfile.csv'))

discretizer = Discretizer(timestep=args.timestep,
                          store_masks=True,
                          impute_strategy='previous',
                          start_time='zero')

if args.deep_supervision:
    discretizer_header = discretizer.transform(
        train_data_loader._data["X"][0])[1].split(',')
else:
    discretizer_header = discretizer.transform(
        train_reader.read_example(0)["X"])[1].split(',')
cont_channels = [
    i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1
]

normalizer = Normalizer(
    fields=cont_channels)  # choose here which columns to standardize
normalizer_state = args.normalizer_state
args = parser.parse_args()
print(args)

target_repl = (args.target_repl_coef > 0.0 and args.mode == 'train')
embeddings, word_indices = get_embeddings(corpus='claims_codes_hs', dim=300)

train_reader = ReadmissionReader(
    dataset_dir='/mnt/MIMIC-III-clean/readmission_cv2/data/',
    listfile='/mnt/MIMIC-III-clean/readmission_cv2/0_train_listfile801010.csv')

val_reader = ReadmissionReader(
    dataset_dir='/mnt/MIMIC-III-clean/readmission_cv2/data/',
    listfile='/mnt/MIMIC-III-clean/readmission_cv2/0_val_listfile801010.csv')

discretizer = Discretizer(timestep=float(args.timestep),
                          store_masks=True,
                          imput_strategy='previous',
                          start_time='zero')

N = train_reader.get_number_of_examples()
ret = common_utils.read_chunk(train_reader, N)
data = ret["X"]
ts = ret["t"]
labels = ret["y"]
names = ret["name"]
diseases_list = get_diseases(names, '/mnt/MIMIC-III-clean/data/')
diseases_embedding = disease_embedding(embeddings, word_indices, diseases_list)

discretizer_header = discretizer.transform(ret["X"][0])[1].split(',')
cont_channels = [
    i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1
]
listfiles_train_dir0 = base + listfiles_data + "/0_train_listfile801010.csv"
listfiles_val_dir0 = base + listfiles_data + "/0_val_listfile801010.csv"
listfiles_test_dir0 = base + listfiles_data + "/0_test_listfile801010.csv"

target_repl = (args.target_repl_coef > 0.0 and args.mode == 'train')
# Read embedding
embeddings, word_indices = get_embeddings(corpus='claims_codes_hs', dim=300)

train_reader = ReadmissionReader(dataset_dir=dataset_episode_dir,
                                 listfile=listfiles_train_dir0)

val_reader = ReadmissionReader(dataset_dir=dataset_episode_dir,
                               listfile=listfiles_val_dir0)

discretizer = Discretizer(timestep=float(args.timestep),
                          store_masks=True,
                          imput_strategy='previous',
                          start_time='zero')

N = train_reader.get_number_of_examples()
ret = common_utils.read_chunk(train_reader, N)
data = ret["X"]
ts = ret["t"]
labels = ret["y"]
names = ret["name"]
feature_cols = ['LOS', 'Hos_LOS', 'Num_Prev_Hos_Adm']
additional_features = get_additional_features(names)
additional_features_list = normalize_standard(additional_features, feature_cols).values.tolist()

diseases_list = get_diseases(names, dataset_subject_dir)
diseases_embedding = disease_embedding(embeddings, word_indices, diseases_list)
demographic = get_demographic(names, dataset_subject_dir)
embeddings, word_indices = get_embeddings(corpus='claims_codes_hs', dim=300)

train_reader = ReadmissionReader(
    dataset_dir='/Users/jeffrey0925/MIMIC-III-clean/readmission_cv2/data/',
    listfile=
    '/Users/jeffrey0925/MIMIC-III-clean/readmission_cv2/0_train_listfile801010.csv'
)

val_reader = ReadmissionReader(
    dataset_dir='/Users/jeffrey0925/MIMIC-III-clean/readmission_cv2/data/',
    listfile=
    '/Users/jeffrey0925/MIMIC-III-clean/readmission_cv2/0_val_listfile801010.csv'
)

discretizer = Discretizer(timestep=float(args.timestep),
                          store_masks=True,
                          imput_strategy='previous',
                          start_time='zero')

N = train_reader.get_number_of_examples()
ret = common_utils.read_chunk(train_reader, N)
data = ret["X"]
ts = ret["t"]
labels = ret["y"]
names = ret["name"]
diseases_list = get_diseases(names, '/Users/jeffrey0925/MIMIC-III-clean/data/')
diseases_embedding = disease_embedding(embeddings, word_indices, diseases_list)
demographic = get_demographic(names,
                              '/Users/jeffrey0925/MIMIC-III-clean/data/')

age_means = sum(demographic[:][0])
age_std = statistics.stdev(demographic[:][0])
Beispiel #7
0
# Build readers, discretizers, normalizers
if args.deep_supervision:
    train_data_loader = common_utils.DeepSupervisionDataLoader(dataset_dir='../../data/decompensation/train/',
                                                               listfile='../../data/decompensation/train_listfile.csv',
                                                               small_part=args.small_part)
    val_data_loader = common_utils.DeepSupervisionDataLoader(dataset_dir='../../data/decompensation/train/',
                                                             listfile='../../data/decompensation/val_listfile.csv',
                                                             small_part=args.small_part)
else:
    train_reader = DecompensationReader(dataset_dir='../../data/decompensation/train/',
                                        listfile='../../data/decompensation/train_listfile.csv')
    val_reader = DecompensationReader(dataset_dir='../../data/decompensation/train/',
                                      listfile='../../data/decompensation/val_listfile.csv')

discretizer = Discretizer(timestep=args.timestep,
                          store_masks=True,
                          imput_strategy='previous',
                          start_time='zero')

if args.deep_supervision:
    discretizer_header = discretizer.transform(train_data_loader._data["X"][0])[1].split(',')
else:
    discretizer_header = discretizer.transform(train_reader.read_example(0)["X"])[1].split(',')
cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1]

normalizer = Normalizer(fields=cont_channels)  # choose here onlycont vs all
normalizer.load_params('decomp_ts{}.input_str:previous.n1e5.start_time:zero.normalizer'.format(args.timestep))

args_dict = dict(args._get_kwargs())
args_dict['header'] = discretizer_header
args_dict['task'] = 'decomp'
def main():
    parser = argparse.ArgumentParser(description='Script for creating a normalizer state - a file which stores the '
                                                 'means and standard deviations of columns of the output of a '
                                                 'discretizer, which are later used to standardize the input of '
                                                 'neural models.')
    parser.add_argument('--task', type=str, required=True,
                        choices=['ihm', 'decomp', 'los', 'pheno', 'multi'])
    parser.add_argument('--timestep', type=float, default=1.0,
                        help="Rate of the re-sampling to discretize time-series.")
    parser.add_argument('--impute_strategy', type=str, default='previous',
                        choices=['zero', 'next', 'previous', 'normal_value'],
                        help='Strategy for imputing missing values.')
    parser.add_argument('--start_time', type=str, choices=['zero', 'relative'],
                        help='Specifies the start time of discretization. Zero means to use the beginning of '
                             'the ICU stay. Relative means to use the time of the first ICU event')
    parser.add_argument('--store_masks', dest='store_masks', action='store_true',
                        help='Store masks that specify observed/imputed values.')
    parser.add_argument('--no-masks', dest='store_masks', action='store_false',
                        help='Do not store that specify specifying observed/imputed values.')
    parser.add_argument('--n_samples', type=int, default=-1, help='How many samples to use to estimates means and '
                        'standard deviations. Set -1 to use all training samples.')
    parser.add_argument('--output_dir', type=str, help='Directory where the output file will be saved.',
                        default='.')
    parser.add_argument('--data', type=str, required=True, help='Path to the task data.')
    parser.set_defaults(store_masks=True)

    args = parser.parse_args()
    print(args)

    # create the reader
    reader = None
    dataset_dir = os.path.join(args.data, 'train')
    if args.task == 'ihm':
        reader = InHospitalMortalityReader(dataset_dir=dataset_dir, period_length=48.0)
    if args.task == 'los':
        reader = LengthOfStayReader(dataset_dir=dataset_dir)
    if args.task == 'multi':
        reader = MultitaskReader(dataset_dir=dataset_dir)

    # create the discretizer
    discretizer = Discretizer(timestep=args.timestep,
                              store_masks=args.store_masks,
                              impute_strategy=args.impute_strategy,
                              start_time=args.start_time)
    discretizer_header = reader.read_example(0)['header']
    continuous_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1]

    # create the normalizer
    normalizer = Normalizer(fields=continuous_channels)

    # read all examples and store the state of the normalizer
    n_samples = args.n_samples
    if n_samples == -1:
        n_samples = reader.get_number_of_examples()

    for i in range(n_samples):
        if i % 1000 == 0:
            print('Processed {} / {} samples'.format(i, n_samples), end='\r')
        ret = reader.read_example(i)
        data, new_header = discretizer.transform(ret['X'], end=ret['t'])
        normalizer._feed_data(data)
    print('\n')

    file_name = '{}_ts_{:.1f}_impute_{}_start_time_{}.normalizer'.format(
        args.task, args.timestep, args.impute_strategy, args.start_time, args.store_masks, n_samples)
    file_name = os.path.join(args.output_dir, file_name)
    print('Saving the state in {} ...'.format(file_name))
    normalizer._save_params(file_name)
Beispiel #9
0
test_starttime_path = conf.starttime_path_test

epochs = 50
learning_rate = 3e-4
batch_size = 8
bootstrap_decomp = BootStrapDecomp(k=1000, experiment_name=experiment)
bootstrap_los = BootStrapLos(experiment_name=experiment)
bootstrap_ihm = BootStrapIhm(experiment_name=experiment)
bootstrap_pheno = BootStrapPheno(experiment_name=experiment)
bootstrap_readmit = BootStrapReadmit(experiment_name=experiment)

# prepare discretizer and normalizer
conf = utils.get_config()

discretizer = Discretizer(timestep=conf.timestep,
                          store_masks=True,
                          impute_strategy='previous',
                          start_time='zero')

cont_channels = [2, 3, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58]

normalizer = Normalizer(fields=cont_channels)
normalizer_state = conf.normalizer_state
if normalizer_state is None:
    normalizer_state = 'mult_ts{}.input_str:previous.start_time:zero.n5e4.normalizer'.format(
        conf.timestep)
normalizer.load_params(normalizer_state)

# Model

text_model = Text_CNN(in_channels=1,
                      out_channels=128,
Beispiel #10
0
listfiles_test_dir0 = base + listfiles_data + "/0_test_listfile801010.csv"

embeddings, word_indices = get_embeddings(corpus='claims_codes_hs', dim=300)

# Build readers, discretizers, normalizers
train_reader = ReadmissionReader(dataset_dir=dataset_episode_dir,
                                 listfile=listfiles_train_dir0)

val_reader = ReadmissionReader(dataset_dir=dataset_episode_dir,
                               listfile=listfiles_val_dir0)

test_reader = ReadmissionReader(dataset_dir=dataset_episode_dir,
                                listfile=listfiles_test_dir0)

discretizer = Discretizer(timestep=float(1.0),
                          store_masks=True,
                          imput_strategy='previous',
                          start_time='zero')

N = train_reader.get_number_of_examples()
ret = common_utils.read_chunk(train_reader, N)
data = ret["X"]
ts = ret["t"]
train_y = ret["y"]
train_names = ret["name"]
diseases_list = get_diseases(train_names, dataset_subject_dir)
diseases_embedding = disease_embedding(embeddings, word_indices, diseases_list)

d, discretizer_header, begin_pos, end_pos = discretizer.transform_reg(data[0])

discretizer_header = discretizer_header.split(',')
Beispiel #11
0
if args.small_part:
    args.save_every = 2**30

target_repl = (args.target_repl_coef > 0.0 and args.mode == 'train')

# Build readers, discretizers, normalizers
train_reader = MultitaskReader(
    dataset_dir='../../data/multitask/train/',
    listfile='../../data/multitask/train_listfile.csv')

val_reader = MultitaskReader(dataset_dir='../../data/multitask/train/',
                             listfile='../../data/multitask/val_listfile.csv')

discretizer = Discretizer(timestep=args.timestep,
                          store_masks=True,
                          imput_strategy='previous',
                          start_time='zero')

discretizer_header = discretizer.transform(
    train_reader.read_example(0)[0])[1].split(',')
cont_channels = [
    i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1
]

normalizer = Normalizer(fields=cont_channels)  # choose here onlycont vs all
normalizer.load_params('mult_ts%s.input_str:%s.start_time:zero.normalizer' %
                       (args.timestep, args.imputation))

args_dict = dict(args._get_kwargs())
args_dict['header'] = discretizer_header
args_dict['ihm_pos'] = int(48.0 / args.timestep - 1e-6)