def main(): parser = argparse.ArgumentParser( description= 'Script for creating a normalizer state - a file which stores the ' 'means and standard deviations of columns of the output of a ' 'discretizer, which are later used to standardize the input of ' 'neural models.') parser.add_argument('--task', type=str, required=True, choices=['ihm', 'decomp', 'los', 'pheno', 'multi']) parser.add_argument( '--timestep', type=float, default=1.0, help="Rate of the re-sampling to discretize time-series.") parser.add_argument('--impute_strategy', type=str, default='previous', choices=['zero', 'next', 'previous', 'normal_value'], help='Strategy for imputing missing values.') parser.add_argument( '--start_time', type=str, choices=['zero', 'relative'], help= 'Specifies the start time of discretization. Zero means to use the beginning of ' 'the ICU stay. Relative means to use the time of the first ICU event') parser.add_argument( '--store_masks', dest='store_masks', action='store_true', help='Store masks that specify observed/imputed values.') parser.add_argument( '--no-masks', dest='store_masks', action='store_false', help='Do not store that specify specifying observed/imputed values.') parser.add_argument( '--n_samples', type=int, default=-1, help='How many samples to use to estimates means and ' 'standard deviations. Set -1 to use all training samples.') parser.add_argument('--output_dir', type=str, help='Directory where the output file will be saved.', default='.') parser.add_argument('--data', type=str, required=True, help='Path to the task data.') parser.set_defaults(store_masks=True) args = parser.parse_args() print(args) # create the reader reader = None dataset_dir = os.path.join(args.data, 'train') if args.task == 'ihm': reader = InHospitalMortalityReader(dataset_dir=dataset_dir, listfile=os.path.join( args.data, 'train_listfile.csv'), period_length=48.0) if args.task == 'decomp': reader = DecompensationReader(dataset_dir=dataset_dir, listfile=os.path.join( args.data, 'train_listfile.csv')) if args.task == 'los': reader = LengthOfStayReader(dataset_dir=dataset_dir, listfile=os.path.join( args.data, 'train_listfile.csv')) if args.task == 'pheno': reader = PhenotypingReader(dataset_dir=dataset_dir, listfile=os.path.join( args.data, 'train_listfile.csv')) if args.task == 'multi': reader = MultitaskReader(dataset_dir=dataset_dir, listfile=os.path.join(args.data, 'train_listfile.csv')) # create the discretizer discretizer = Discretizer(timestep=args.timestep, store_masks=args.store_masks, impute_strategy=args.impute_strategy, start_time=args.start_time) discretizer_header = reader.read_example(0)['header'] continuous_channels = [ i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1 ] # create the normalizer normalizer = Normalizer(fields=continuous_channels) # read all examples and store the state of the normalizer n_samples = args.n_samples if n_samples == -1: n_samples = reader.get_number_of_examples() for i in range(n_samples): if i % 1000 == 0: print('Processed {} / {} samples'.format(i, n_samples), end='\r') ret = reader.read_example(i) data, new_header = discretizer.transform(ret['X'], end=ret['t']) normalizer._feed_data(data) print('\n') file_name = '{}_ts:{:.2f}_impute:{}_start:{}_masks:{}_n:{}.normalizer'.format( args.task, args.timestep, args.impute_strategy, args.start_time, args.store_masks, n_samples) file_name = os.path.join(args.output_dir, file_name) print('Saving the state in {} ...'.format(file_name)) normalizer._save_params(file_name)
parser.add_argument('--partition', type=str, default='custom', help="log, custom, none") parser.add_argument('--ihm_C', type=float, default=1.0) parser.add_argument('--los_C', type=float, default=1.0) parser.add_argument('--pheno_C', type=float, default=1.0) parser.add_argument('--decomp_C', type=float, default=1.0) args = parser.parse_args() print args if args.small_part: args.save_every = 2**30 target_repl = (args.target_repl_coef > 0.0 and args.mode == 'train') # Build readers, discretizers, normalizers train_reader = MultitaskReader(dataset_dir='../../data/multitask/train/', listfile='../../data/multitask/train_listfile.csv') val_reader = MultitaskReader(dataset_dir='../../data/multitask/train/', listfile='../../data/multitask/val_listfile.csv') discretizer = Discretizer(timestep=args.timestep, store_masks=True, imput_strategy='previous', start_time='zero') discretizer_header = discretizer.transform(train_reader.read_example(0)[0])[1].split(',') cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1] normalizer = Normalizer(fields=cont_channels) # choose here onlycont vs all normalizer.load_params('mult_ts%s.input_str:%s.start_time:zero.normalizer' % (args.timestep, args.imputation))
parser.add_argument('--target_repl_coef', type=float, default=0.0) parser.add_argument('--partition', type=str, default='custom', help="log, custom, none") parser.add_argument('--ihm_C', type=float, default=1.0) parser.add_argument('--los_C', type=float, default=1.0) parser.add_argument('--pheno_C', type=float, default=1.0) parser.add_argument('--decomp_C', type=float, default=1.0) args = parser.parse_args() print args if args.small_part: args.save_every = 2 ** 30 target_repl = (args.target_repl_coef > 0.0 and args.mode == 'train') # Build readers, discretizers, normalizers train_reader = MultitaskReader(dataset_dir='../../data/multitask/train/', listfile='../../data/multitask/train_listfile.csv') val_reader = MultitaskReader(dataset_dir='../../data/multitask/train/', listfile='../../data/multitask/val_listfile.csv') discretizer = Discretizer(timestep=args.timestep, store_masks=True, imput_strategy='previous', start_time='zero') discretizer_header = discretizer.transform(train_reader.read_example(0)["X"])[1].split(',') cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1] normalizer = Normalizer(fields=cont_channels) # choose here onlycont vs all normalizer.load_params('mult_ts%s.input_str:%s.start_time:zero.normalizer' % (args.timestep, args.imputation))
if args.structured_data: sources.append('structured_data') experiment_name=experiment_name+'structured_' if args.weighted: experiment_name=experiment_name+'weighted_' if args.condensed: experiment_name=experiment_name+'condensed_' if args.small_part: args.save_every = 2 ** 30 target_repl = (args.target_repl_coef > 0.0 and args.mode == 'train') # Build readers, discretizers, normalizers train_reader = MultitaskReader(dataset_dir=os.path.join(args.data, 'train'), listfile=os.path.join(args.data, 'train_listfile.csv'), sources=sources, timesteps=args.timesteps, condensed=args.condensed) val_reader = MultitaskReader(dataset_dir=os.path.join(args.data, 'train'), listfile=os.path.join(args.data, 'val_listfile.csv'), sources=sources, timesteps=args.timesteps, condensed=args.condensed) reader_header = train_reader.read_example(0)['header'] n_bins = len(train_reader.read_example(0)) discretizer = Discretizer(timestep=args.timestep, store_masks=True, impute_strategy='previous', start_time='zero', header = reader_header, sources = sources) discretizer_header = discretizer.transform(train_reader.read_example(0)["X"])[1].split(',') cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1] normalizer = Normalizer(fields=cont_channels) # choose here which columns to standardize
parser.add_argument( '--output_dir', type=str, help='Directory relative which all output files are stored', default='.') args = parser.parse_args() print(args) if args.small_part: args.save_every = 2**30 target_repl = (args.target_repl_coef > 0.0 and args.mode == 'train') # Build readers, discretizers, normalizers train_reader = MultitaskReader(dataset_dir=os.path.join(args.data, 'train'), listfile=os.path.join(args.data, 'train_listfile.csv')) val_reader = MultitaskReader(dataset_dir=os.path.join(args.data, 'train'), listfile=os.path.join(args.data, 'val_listfile.csv')) discretizer = Discretizer(timestep=args.timestep, store_masks=True, impute_strategy='previous', start_time='zero') discretizer_header = discretizer.transform( train_reader.read_example(0)["X"])[1].split(',') cont_channels = [ i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1
decay = 0.1 max_text_length = 500 max_num_notes = 150 regression = False bin_type = 'coarse' train_starttime_path = conf.starttime_path_train test_starttime_path = conf.starttime_path_test device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') epochs = 40 learning_rate = 1e-4 #1e-4 for decomp batch_size = 5 # prepare discretizer and normalizer conf = utils.get_config() train_reader = MultitaskReader(dataset_dir=os.path.join(train_ts_root_dir), listfile=os.path.join(conf.multitask_path, 'train', 'listfile.csv')) test_reader = MultitaskReader(dataset_dir=os.path.join(test_ts_root_dir), listfile=os.path.join(conf.multitask_path, 'test', 'listfile.csv')) discretizer = Discretizer(timestep=conf.timestep, store_masks=True, impute_strategy='previous', start_time='zero') discretizer_header = discretizer.transform( train_reader.read_example(0)["X"])[1].split(',') cont_channels = [ i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1 ] normalizer = Normalizer(fields=cont_channels) normalizer_state = conf.normalizer_state
else: raise argparse.ArgumentTypeError('Boolean value expected.') parser.add_argument('--tb', type=str2bool, default=False) args = parser.parse_args() print args if args.small_part: args.save_every = 2**30 target_repl = (args.target_repl_coef > 0.0 and args.mode == 'train') # Build readers, discretizers, normalizers train_reader = MultitaskReader(dataset_dir='data/multitask/train/', listfile='data/multitask/train_listfile.csv') val_reader = MultitaskReader(dataset_dir='data/multitask/train/', listfile='data/multitask/val_listfile.csv') discretizer = Discretizer(timestep=args.timestep, store_masks=True, imput_strategy='previous', start_time='zero') discretizer_header = discretizer.transform( train_reader.read_example(0)["X"])[1].split(',') cont_channels = [ i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1 ]
help="log, custom, none") parser.add_argument('--ihm_C', type=float, default=1.0) parser.add_argument('--los_C', type=float, default=1.0) parser.add_argument('--pheno_C', type=float, default=1.0) parser.add_argument('--decomp_C', type=float, default=1.0) args = parser.parse_args() print args if args.small_part: args.save_every = 2**30 target_repl = (args.target_repl_coef > 0.0 and args.mode == 'train') # Build readers, discretizers, normalizers train_reader = MultitaskReader( dataset_dir='../../data/multitask/train/', listfile='../../data/multitask/train_listfile.csv') val_reader = MultitaskReader(dataset_dir='../../data/multitask/train/', listfile='../../data/multitask/val_listfile.csv') discretizer = Discretizer(timestep=args.timestep, store_masks=True, imput_strategy='previous', start_time='zero') discretizer_header = discretizer.transform( train_reader.read_example(0)[0])[1].split(',') cont_channels = [ i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1 ]
# 4. Set the `tensorflow` pseudo-random generator at a fixed value tf.set_random_seed(seed_value) # 5. Configure a new global `tensorflow` session session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) sess = tf.Session(graph=tf.get_default_graph(), config=session_conf) K.set_session(sess) if args.small_part: args.save_every = 2 ** 30 target_repl = (args.target_repl_coef > 0.0 and args.mode == 'train') # Build readers, discretizers, normalizers train_reader = MultitaskReader(dataset_dir=os.path.join(args.data, 'train'), listfile=os.path.join(args.data, 'train_listfile.csv')) val_reader = MultitaskReader(dataset_dir=os.path.join(args.data, 'train'), listfile=os.path.join(args.data, 'val_listfile.csv')) discretizer = OneHotEncoder(impute_strategy=args.imputation) discretizer_header = discretizer.transform(train_reader.read_example(0)["X"])[2].split(',') cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1] normalizer = Normalizer(fields=cont_channels) # choose here which columns to standardize normalizer_state = args.normalizer_state if normalizer_state is None: normalizer_state = 'multi_onehotenc_n:29250.normalizer' normalizer_state = os.path.join(os.path.dirname(__file__), normalizer_state) normalizer.load_params(normalizer_state)