예제 #1
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Script for creating a normalizer state - a file which stores the '
        'means and standard deviations of columns of the output of a '
        'discretizer, which are later used to standardize the input of '
        'neural models.')
    parser.add_argument('--task',
                        type=str,
                        required=True,
                        choices=['ihm', 'decomp', 'los', 'pheno', 'multi'])
    parser.add_argument(
        '--timestep',
        type=float,
        default=1.0,
        help="Rate of the re-sampling to discretize time-series.")
    parser.add_argument('--impute_strategy',
                        type=str,
                        default='previous',
                        choices=['zero', 'next', 'previous', 'normal_value'],
                        help='Strategy for imputing missing values.')
    parser.add_argument(
        '--start_time',
        type=str,
        choices=['zero', 'relative'],
        help=
        'Specifies the start time of discretization. Zero means to use the beginning of '
        'the ICU stay. Relative means to use the time of the first ICU event')
    parser.add_argument(
        '--store_masks',
        dest='store_masks',
        action='store_true',
        help='Store masks that specify observed/imputed values.')
    parser.add_argument(
        '--no-masks',
        dest='store_masks',
        action='store_false',
        help='Do not store that specify specifying observed/imputed values.')
    parser.add_argument(
        '--n_samples',
        type=int,
        default=-1,
        help='How many samples to use to estimates means and '
        'standard deviations. Set -1 to use all training samples.')
    parser.add_argument('--output_dir',
                        type=str,
                        help='Directory where the output file will be saved.',
                        default='.')
    parser.add_argument('--data',
                        type=str,
                        required=True,
                        help='Path to the task data.')
    parser.set_defaults(store_masks=True)

    args = parser.parse_args()
    print(args)

    # create the reader
    reader = None
    dataset_dir = os.path.join(args.data, 'train')
    if args.task == 'ihm':
        reader = InHospitalMortalityReader(dataset_dir=dataset_dir,
                                           listfile=os.path.join(
                                               args.data,
                                               'train_listfile.csv'),
                                           period_length=48.0)
    if args.task == 'decomp':
        reader = DecompensationReader(dataset_dir=dataset_dir,
                                      listfile=os.path.join(
                                          args.data, 'train_listfile.csv'))
    if args.task == 'los':
        reader = LengthOfStayReader(dataset_dir=dataset_dir,
                                    listfile=os.path.join(
                                        args.data, 'train_listfile.csv'))
    if args.task == 'pheno':
        reader = PhenotypingReader(dataset_dir=dataset_dir,
                                   listfile=os.path.join(
                                       args.data, 'train_listfile.csv'))
    if args.task == 'multi':
        reader = MultitaskReader(dataset_dir=dataset_dir,
                                 listfile=os.path.join(args.data,
                                                       'train_listfile.csv'))

    # create the discretizer
    discretizer = Discretizer(timestep=args.timestep,
                              store_masks=args.store_masks,
                              impute_strategy=args.impute_strategy,
                              start_time=args.start_time)
    discretizer_header = reader.read_example(0)['header']
    continuous_channels = [
        i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1
    ]

    # create the normalizer
    normalizer = Normalizer(fields=continuous_channels)

    # read all examples and store the state of the normalizer
    n_samples = args.n_samples
    if n_samples == -1:
        n_samples = reader.get_number_of_examples()

    for i in range(n_samples):
        if i % 1000 == 0:
            print('Processed {} / {} samples'.format(i, n_samples), end='\r')
        ret = reader.read_example(i)
        data, new_header = discretizer.transform(ret['X'], end=ret['t'])
        normalizer._feed_data(data)
    print('\n')

    file_name = '{}_ts:{:.2f}_impute:{}_start:{}_masks:{}_n:{}.normalizer'.format(
        args.task, args.timestep, args.impute_strategy, args.start_time,
        args.store_masks, n_samples)
    file_name = os.path.join(args.output_dir, file_name)
    print('Saving the state in {} ...'.format(file_name))
    normalizer._save_params(file_name)
예제 #2
0
parser.add_argument('--partition', type=str, default='custom',
                    help="log, custom, none")
parser.add_argument('--ihm_C', type=float, default=1.0)
parser.add_argument('--los_C', type=float, default=1.0)
parser.add_argument('--pheno_C', type=float, default=1.0)
parser.add_argument('--decomp_C', type=float, default=1.0)
args = parser.parse_args()
print args

if args.small_part:
    args.save_every = 2**30

target_repl = (args.target_repl_coef > 0.0 and args.mode == 'train')

# Build readers, discretizers, normalizers
train_reader = MultitaskReader(dataset_dir='../../data/multitask/train/',
                            listfile='../../data/multitask/train_listfile.csv')

val_reader = MultitaskReader(dataset_dir='../../data/multitask/train/',
                            listfile='../../data/multitask/val_listfile.csv')

discretizer = Discretizer(timestep=args.timestep,
                          store_masks=True,
                          imput_strategy='previous',
                          start_time='zero')

discretizer_header = discretizer.transform(train_reader.read_example(0)[0])[1].split(',')
cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1]

normalizer = Normalizer(fields=cont_channels) # choose here onlycont vs all
normalizer.load_params('mult_ts%s.input_str:%s.start_time:zero.normalizer' % (args.timestep, args.imputation))
예제 #3
0
parser.add_argument('--target_repl_coef', type=float, default=0.0)
parser.add_argument('--partition', type=str, default='custom', help="log, custom, none")
parser.add_argument('--ihm_C', type=float, default=1.0)
parser.add_argument('--los_C', type=float, default=1.0)
parser.add_argument('--pheno_C', type=float, default=1.0)
parser.add_argument('--decomp_C', type=float, default=1.0)
args = parser.parse_args()
print args

if args.small_part:
    args.save_every = 2 ** 30

target_repl = (args.target_repl_coef > 0.0 and args.mode == 'train')

# Build readers, discretizers, normalizers
train_reader = MultitaskReader(dataset_dir='../../data/multitask/train/',
                               listfile='../../data/multitask/train_listfile.csv')

val_reader = MultitaskReader(dataset_dir='../../data/multitask/train/',
                             listfile='../../data/multitask/val_listfile.csv')

discretizer = Discretizer(timestep=args.timestep,
                          store_masks=True,
                          imput_strategy='previous',
                          start_time='zero')

discretizer_header = discretizer.transform(train_reader.read_example(0)["X"])[1].split(',')
cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1]

normalizer = Normalizer(fields=cont_channels)  # choose here onlycont vs all
normalizer.load_params('mult_ts%s.input_str:%s.start_time:zero.normalizer' % (args.timestep, args.imputation))
예제 #4
0
if args.structured_data:
    sources.append('structured_data')
    experiment_name=experiment_name+'structured_'
if args.weighted:
    experiment_name=experiment_name+'weighted_'
if args.condensed:
    experiment_name=experiment_name+'condensed_'


if args.small_part:
    args.save_every = 2 ** 30

target_repl = (args.target_repl_coef > 0.0 and args.mode == 'train')

# Build readers, discretizers, normalizers
train_reader = MultitaskReader(dataset_dir=os.path.join(args.data, 'train'),
                               listfile=os.path.join(args.data, 'train_listfile.csv'), sources=sources, timesteps=args.timesteps, condensed=args.condensed)

val_reader = MultitaskReader(dataset_dir=os.path.join(args.data, 'train'),
                             listfile=os.path.join(args.data, 'val_listfile.csv'), sources=sources, timesteps=args.timesteps, condensed=args.condensed)
reader_header = train_reader.read_example(0)['header']
n_bins = len(train_reader.read_example(0))

discretizer = Discretizer(timestep=args.timestep,
                          store_masks=True,
                          impute_strategy='previous',
                          start_time='zero', header = reader_header, sources = sources)

discretizer_header = discretizer.transform(train_reader.read_example(0)["X"])[1].split(',')
cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1]

normalizer = Normalizer(fields=cont_channels)  # choose here which columns to standardize
예제 #5
0
parser.add_argument(
    '--output_dir',
    type=str,
    help='Directory relative which all output files are stored',
    default='.')
args = parser.parse_args()
print(args)

if args.small_part:
    args.save_every = 2**30

target_repl = (args.target_repl_coef > 0.0 and args.mode == 'train')

# Build readers, discretizers, normalizers
train_reader = MultitaskReader(dataset_dir=os.path.join(args.data, 'train'),
                               listfile=os.path.join(args.data,
                                                     'train_listfile.csv'))

val_reader = MultitaskReader(dataset_dir=os.path.join(args.data, 'train'),
                             listfile=os.path.join(args.data,
                                                   'val_listfile.csv'))

discretizer = Discretizer(timestep=args.timestep,
                          store_masks=True,
                          impute_strategy='previous',
                          start_time='zero')

discretizer_header = discretizer.transform(
    train_reader.read_example(0)["X"])[1].split(',')
cont_channels = [
    i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1
예제 #6
0
decay = 0.1
max_text_length = 500
max_num_notes = 150
regression = False
bin_type = 'coarse'
train_starttime_path = conf.starttime_path_train
test_starttime_path = conf.starttime_path_test
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
epochs = 40
learning_rate = 1e-4  #1e-4 for decomp
batch_size = 5

# prepare discretizer and normalizer
conf = utils.get_config()
train_reader = MultitaskReader(dataset_dir=os.path.join(train_ts_root_dir),
                               listfile=os.path.join(conf.multitask_path,
                                                     'train', 'listfile.csv'))
test_reader = MultitaskReader(dataset_dir=os.path.join(test_ts_root_dir),
                              listfile=os.path.join(conf.multitask_path,
                                                    'test', 'listfile.csv'))
discretizer = Discretizer(timestep=conf.timestep,
                          store_masks=True,
                          impute_strategy='previous',
                          start_time='zero')
discretizer_header = discretizer.transform(
    train_reader.read_example(0)["X"])[1].split(',')
cont_channels = [
    i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1
]
normalizer = Normalizer(fields=cont_channels)
normalizer_state = conf.normalizer_state
예제 #7
0
    else:
        raise argparse.ArgumentTypeError('Boolean value expected.')


parser.add_argument('--tb', type=str2bool, default=False)
args = parser.parse_args()

print args

if args.small_part:
    args.save_every = 2**30

target_repl = (args.target_repl_coef > 0.0 and args.mode == 'train')

# Build readers, discretizers, normalizers
train_reader = MultitaskReader(dataset_dir='data/multitask/train/',
                               listfile='data/multitask/train_listfile.csv')

val_reader = MultitaskReader(dataset_dir='data/multitask/train/',
                             listfile='data/multitask/val_listfile.csv')

discretizer = Discretizer(timestep=args.timestep,
                          store_masks=True,
                          imput_strategy='previous',
                          start_time='zero')

discretizer_header = discretizer.transform(
    train_reader.read_example(0)["X"])[1].split(',')
cont_channels = [
    i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1
]
예제 #8
0
                    help="log, custom, none")
parser.add_argument('--ihm_C', type=float, default=1.0)
parser.add_argument('--los_C', type=float, default=1.0)
parser.add_argument('--pheno_C', type=float, default=1.0)
parser.add_argument('--decomp_C', type=float, default=1.0)
args = parser.parse_args()
print args

if args.small_part:
    args.save_every = 2**30

target_repl = (args.target_repl_coef > 0.0 and args.mode == 'train')

# Build readers, discretizers, normalizers
train_reader = MultitaskReader(
    dataset_dir='../../data/multitask/train/',
    listfile='../../data/multitask/train_listfile.csv')

val_reader = MultitaskReader(dataset_dir='../../data/multitask/train/',
                             listfile='../../data/multitask/val_listfile.csv')

discretizer = Discretizer(timestep=args.timestep,
                          store_masks=True,
                          imput_strategy='previous',
                          start_time='zero')

discretizer_header = discretizer.transform(
    train_reader.read_example(0)[0])[1].split(',')
cont_channels = [
    i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1
]
예제 #9
0
# 4. Set the `tensorflow` pseudo-random generator at a fixed value
tf.set_random_seed(seed_value)

# 5. Configure a new global `tensorflow` session
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)


if args.small_part:
    args.save_every = 2 ** 30

target_repl = (args.target_repl_coef > 0.0 and args.mode == 'train')

# Build readers, discretizers, normalizers
train_reader = MultitaskReader(dataset_dir=os.path.join(args.data, 'train'),
                               listfile=os.path.join(args.data, 'train_listfile.csv'))

val_reader = MultitaskReader(dataset_dir=os.path.join(args.data, 'train'),
                             listfile=os.path.join(args.data, 'val_listfile.csv'))

discretizer = OneHotEncoder(impute_strategy=args.imputation)
discretizer_header = discretizer.transform(train_reader.read_example(0)["X"])[2].split(',')

cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1]

normalizer = Normalizer(fields=cont_channels)  # choose here which columns to standardize
normalizer_state = args.normalizer_state
if normalizer_state is None:
    normalizer_state = 'multi_onehotenc_n:29250.normalizer'
    normalizer_state = os.path.join(os.path.dirname(__file__), normalizer_state)
normalizer.load_params(normalizer_state)