Ejemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser(description='Script for creating a normalizer state - a file which stores the '
                                                 'means and standard deviations of columns of the output of a '
                                                 'discretizer, which are later used to standardize the input of '
                                                 'neural models.')
    parser.add_argument('--task', type=str,choices=['ihm', 'los'])
    parser.add_argument('--timestep', type=float, default=1.0,
                        help="Rate of the re-sampling to discretize time-series.")
    parser.add_argument('--impute_strategy', type=str, default='previous',
                        choices=['zero', 'next', 'previous', 'normal_value'],
                        help='Strategy for imputing missing values.')
    parser.add_argument('--start_time', type=str, choices=['zero', 'relative'],
                        help='Specifies the start time of discretization. Zero means to use the beginning of '
                             'the ICU stay. Relative means to use the time of the first ICU event')
    parser.add_argument('--store_masks', dest='store_masks', action='store_true',
                        help='Store masks that specify observed/imputed values.')
    parser.add_argument('--no-masks', dest='store_masks', action='store_false',
                        help='Do not store that specify specifying observed/imputed values.')
    parser.add_argument('--n_samples', type=int, default=-1, help='How many samples to use to estimates means and '
                        'standard deviations. Set -1 to use all training samples.')
    parser.add_argument('--output_dir', type=str, help='Directory where the output file will be saved.',
                        default='.')
    parser.add_argument('--data', type=str, required=True, help='Path to the task data.')
    parser.set_defaults(store_masks=True)

    args = parser.parse_args()
    print(args)

    # create the reader
    reader = None
    dataset_dir = os.path.join(args.data, 'train')
    if args.task == 'ihm':
        reader = InHospitalMortalityReader(dataset_dir=dataset_dir, period_length=48.0)
    if args.task == 'los':
        reader = LengthOfStayReader(dataset_dir=dataset_dir)   

    # create the discretizer
    discretizer = Discretizer(timestep=args.timestep,
                              store_masks=args.store_masks,
                              impute_strategy=args.impute_strategy,
                              start_time=args.start_time)
    discretizer_header = reader.read_example(0)['header']
    continuous_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1]

    # create the normalizer
    normalizer = Normalizer(fields=continuous_channels)

    # read all examples and store the state of the normalizer
    n_samples = args.n_samples
    if n_samples == -1:
        n_samples = reader.get_number_of_examples()

    for i in range(n_samples):
        if i % 1000 == 0:
            print('Processed {} / {} samples'.format(i, n_samples), end='\r')
        ret = reader.read_example(i)
        data, new_header = discretizer.transform(ret['X'], end=ret['t'])
        normalizer._feed_data(data)
    print('\n')

    file_name = '{}_ts_{:.1f}_impute_{}_start_time{}.normalizer'.format(
        args.task, args.timestep, args.impute_strategy, args.start_time)
    file_name = os.path.join(args.output_dir, file_name)
    print('Saving the state in {} ...'.format(file_name))
    normalizer._save_params(file_name)
Ejemplo n.º 2
0
names = ret["name"]
diseases_list = get_diseases(names, '/mnt/MIMIC-III-clean/data/')
diseases_embedding = disease_embedding(embeddings, word_indices, diseases_list)

discretizer_header = discretizer.transform(ret["X"][0])[1].split(',')
cont_channels = [
    i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1
]
normalizer = Normalizer(fields=cont_channels)  # choose here onlycont vs all

data = [
    discretizer.transform_first_t_hours(X, end=t)[0]
    for (X, t) in zip(data, ts)
]

[normalizer._feed_data(x=X) for X in data]
normalizer._use_params()

args_dict = dict(args._get_kwargs())

args_dict['task'] = 'ihm'
args_dict['target_repl'] = target_repl

# Build the model
print("==> using model {}".format(args.network))
print('os.path.basename(args.network), args.network: ',
      os.path.basename(args.network), args.network)
model_module = importlib.machinery.SourceFileLoader(
    os.path.basename(args.network), args.network).load_module()
model = model_module.Network(**args_dict)
suffix = ".bs{}{}{}.ts{}{}".format(
Ejemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Script for creating a normalizer state - a file which stores the '
        'means and standard deviations of columns of the output of a '
        'discretizer, which are later used to standardize the input of '
        'neural models.')
    parser.add_argument('--task',
                        type=str,
                        required=True,
                        choices=['ihm', 'decomp', 'los', 'pheno', 'multi'])
    parser.add_argument('--impute_strategy',
                        type=str,
                        default='previous',
                        choices=['zero', 'next', 'previous', 'normal_value'],
                        help='Strategy for imputing missing values.')
    parser.add_argument(
        '--store_masks',
        dest='store_masks',
        action='store_true',
        help='Store masks that specify observed/imputed values.')
    parser.add_argument(
        '--no-masks',
        dest='store_masks',
        action='store_false',
        help='Do not store that specify specifying observed/imputed values.')
    parser.add_argument(
        '--n_samples',
        type=int,
        default=-1,
        help='How many samples to use to estimates means and '
        'standard deviations. Set -1 to use all training samples.')
    parser.add_argument('--output_dir',
                        type=str,
                        help='Directory where the output file will be saved.',
                        default='.')
    parser.add_argument('--data',
                        type=str,
                        required=True,
                        help='Path to the task data.')
    parser.set_defaults(store_masks=True)

    args = parser.parse_args()
    print(args)

    # create the reader
    reader = None
    dataset_dir = os.path.join(args.data, 'train')
    if args.task == 'ihm':
        reader = InHospitalMortalityReader(dataset_dir=dataset_dir,
                                           listfile=os.path.join(
                                               args.data,
                                               'train_listfile.csv'),
                                           period_length=48.0)
    if args.task == 'decomp':
        reader = DecompensationReader(dataset_dir=dataset_dir,
                                      listfile=os.path.join(
                                          args.data, 'train_listfile.csv'))
    if args.task == 'los':
        reader = LengthOfStayReader(dataset_dir=dataset_dir,
                                    listfile=os.path.join(
                                        args.data, 'train_listfile.csv'))
    if args.task == 'pheno':
        reader = PhenotypingReader(dataset_dir=dataset_dir,
                                   listfile=os.path.join(
                                       args.data, 'train_listfile.csv'))
    if args.task == 'multi':
        reader = MultitaskReader(dataset_dir=dataset_dir,
                                 listfile=os.path.join(args.data,
                                                       'train_listfile.csv'))

    # create the discretizer
    discretizer = OneHotEncoder(impute_strategy=args.impute_strategy)
    discretizer_header = reader.read_example(0)['header']
    continuous_channels = [
        i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1
    ]

    # create the normalizer
    normalizer = Normalizer(fields=continuous_channels)

    # read all examples and store the state of the normalizer
    n_samples = args.n_samples
    if n_samples == -1:
        n_samples = reader.get_number_of_examples()

    for i in range(n_samples):
        if i % 1000 == 0:
            print('Processed {} / {} samples'.format(i, n_samples), end='\r')
        ret = reader.read_example(i)
        data, _, new_header = discretizer.transform(ret['X'], end=ret['t'])
        normalizer._feed_data(data)

    print('\n')

    file_name = '{}_onehotenc_n:{}.normalizer'.format(args.task, n_samples)
    file_name = os.path.join(args.output_dir, file_name)
    print('Saving the state in {} ...'.format(file_name))
    normalizer._save_params(file_name)