Ejemplo n.º 1
0
    def _load_data(self, testfold=4):
        train_reader = InHospitalMortalityReader(
            dataset_dir='mimic3-benchmarks/data/in-hospital-mortality/train/',
            listfile=
            'mimic3-benchmarks/data/in-hospital-mortality/train_listfile.csv',
            period_length=48.0)

        val_reader = InHospitalMortalityReader(
            dataset_dir='mimic3-benchmarks/data/in-hospital-mortality/train/',
            listfile=
            'mimic3-benchmarks/data/in-hospital-mortality/val_listfile.csv',
            period_length=48.0)

        test_reader = InHospitalMortalityReader(
            dataset_dir='mimic3-benchmarks/data/in-hospital-mortality/test/',
            listfile=
            'mimic3-benchmarks/data/in-hospital-mortality/test_listfile.csv',
            period_length=48.0)

        discretizer = Discretizer(timestep=float(4),
                                  store_masks=True,
                                  imput_strategy='previous',
                                  start_time='zero')

        discretizer_header = discretizer.transform(
            train_reader.read_example(0)[0])[1].split(',')
        cont_channels = [
            i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1
        ]

        normalizer = Normalizer(
            fields=cont_channels)  # choose here onlycont vs all
        normalizer.load_params(
            'mimic3-benchmarks/mimic3models/in_hospital_mortality/'
            'ihm_ts%s.input_str:%s.start_time:zero.normalizer' %
            ('2.0', 'previous'))
        # normalizer=None

        train_raw = utils.load_data(train_reader, discretizer, normalizer,
                                    False)
        val_raw = utils.load_data(val_reader, discretizer, normalizer, False)
        test_raw = utils.load_data(test_reader, discretizer, normalizer, False)

        # To split into
        def preprocess(the_raw_set):
            x, y = the_raw_set
            x = x.astype(np.float32, copy=False)
            y = np.array(y)
            return x, y

        train_raw = preprocess(train_raw)
        val_raw = preprocess(val_raw)
        test_raw = preprocess(test_raw)
        return train_raw, val_raw, test_raw
Ejemplo n.º 2
0
def create_data_loader(data, subset=False):
    # Build readers, discretizers, normalizers
    print("Creating Data File Reader")
    #Using val_test set for visualization
    data_reader = Reader(dataset_dir=os.path.join(data, 'test'),
                         listfile=os.path.join(data, 'test', 'listfile.csv'),
                         period_length=24.0)

    #For Hourly Task we need to limit the amount of data for visualization
    if subset:
        print("limiting data")
        data_reader.limit_data(10)

    print("Initializing Discretizer and Normalizer")
    discretizer = DiscretizerContinuous(timestep=1.0,
                                        store_masks=False,
                                        impute_strategy='previous',
                                        start_time='zero')

    discretizer_header = discretizer.transform(
        data_reader.read_example(0)["X"])[1]
    cont_channels = [
        i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1
    ]

    normalizer = Normalizer(
        fields=cont_channels)  # choose here which columns to standardize
    normalizer_state = args.normalizer_state
    if normalizer_state is None:
        normalizer_state = 'ptemb_ts{}.input_str:{}.start_time:zero.normalizer'.format(
            args.timestep, args.imputation)
        normalizer_state = os.path.join(os.path.dirname(__file__),
                                        normalizer_state)
    normalizer.load_params(normalizer_state)

    #Create Dataset + DataLoader
    print("Building Dataset")
    data_dataset = ClassDataset(reader=data_reader,
                                discretizer=discretizer,
                                normalizer=normalizer,
                                return_name=False,
                                embed_method=args.embed_method)

    print("Building DataLoader")
    data_loader = DataLoader(data_dataset,
                             batch_size=args.batch_size,
                             shuffle=False,
                             num_workers=args.num_workers)

    return data_loader
Ejemplo n.º 3
0
train_reader = MultitaskReader(dataset_dir='../../data/multitask/train/',
                            listfile='../../data/multitask/train_listfile.csv')

val_reader = MultitaskReader(dataset_dir='../../data/multitask/train/',
                            listfile='../../data/multitask/val_listfile.csv')

discretizer = Discretizer(timestep=args.timestep,
                          store_masks=True,
                          imput_strategy='previous',
                          start_time='zero')

discretizer_header = discretizer.transform(train_reader.read_example(0)[0])[1].split(',')
cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1]

normalizer = Normalizer(fields=cont_channels) # choose here onlycont vs all
normalizer.load_params('mult_ts%s.input_str:%s.start_time:zero.normalizer' % (args.timestep, args.imputation))

args_dict = dict(args._get_kwargs())
args_dict['header'] = discretizer_header
args_dict['ihm_pos'] = int(48.0 / args.timestep - 1e-6)
args_dict['target_repl'] = target_repl

# Build the model
print "==> using model {}".format(args.network)
model_module = imp.load_source(os.path.basename(args.network), args.network)
model = model_module.Network(**args_dict)
network = model # alias
suffix = ".bs{}{}{}.ts{}{}_partition={}_ihm={}_decomp={}_los={}_pheno={}".format(
                                    args.batch_size,
                                    ".L1{}".format(args.l1) if args.l1 > 0 else "",
                                    ".L2{}".format(args.l2) if args.l2 > 0 else "",
Ejemplo n.º 4
0
train_reader = PhenotypingReader(dataset_dir='../../data/phenotyping/train/',
                                 listfile='../../data/phenotyping/train_listfile.csv')

val_reader = PhenotypingReader(dataset_dir='../../data/phenotyping/train/',
                               listfile='../../data/phenotyping/val_listfile.csv')

discretizer = Discretizer(timestep=float(args.timestep),
                          store_masks=True,
                          imput_strategy='previous',
                          start_time='zero')

discretizer_header = discretizer.transform(train_reader.read_example(0)["X"])[1].split(',')
cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1]

normalizer = Normalizer(fields=cont_channels)  # choose here onlycont vs all
normalizer.load_params('ph_ts{}.input_str:previous.start_time:zero.normalizer'.format(args.timestep))

args_dict = dict(args._get_kwargs())
args_dict['header'] = discretizer_header
args_dict['task'] = 'ph'
args_dict['num_classes'] = 25
args_dict['target_repl'] = target_repl

# Build the model
print "==> using model {}".format(args.network)
model_module = imp.load_source(os.path.basename(args.network), args.network)
model = model_module.Network(**args_dict)
suffix = ".bs{}{}{}.ts{}{}".format(args.batch_size,
                                   ".L1{}".format(args.l1) if args.l1 > 0 else "",
                                   ".L2{}".format(args.l2) if args.l2 > 0 else "",
                                   args.timestep,
Ejemplo n.º 5
0
                          imput_strategy='previous',
                          start_time='zero')

if args.deep_supervision:
    discretizer_header = discretizer.transform(
        train_data_loader._data["X"][0])[1].split(',')
else:
    discretizer_header = discretizer.transform(
        train_reader.read_example(0)["X"])[1].split(',')
cont_channels = [
    i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1
]

normalizer = Normalizer(fields=cont_channels)  # choose here onlycont vs all
normalizer.load_params(
    'los_ts{}.input_str:previous.start_time:zero.n5e4.normalizer'.format(
        args.timestep))

args_dict = dict(args._get_kwargs())
args_dict['header'] = discretizer_header
args_dict['task'] = 'los'
args_dict['num_classes'] = (1 if args.partition == 'none' else 10)

# Build the model
print "==> using model {}".format(args.network)
model_module = imp.load_source(os.path.basename(args.network), args.network)
model = model_module.Network(**args_dict)
suffix = "{}.bs{}{}{}.ts{}.partition={}".format(
    "" if not args.deep_supervision else ".dsup", args.batch_size,
    ".L1{}".format(args.l1) if args.l1 > 0 else "",
    ".L2{}".format(args.l2) if args.l2 > 0 else "", args.timestep,
Ejemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser()
    common_utils.add_common_arguments_backdoor(parser)
    parser.add_argument('--target_repl_coef', type=float, default=0.0)
    parser.add_argument('--data',
                        type=str,
                        help='Path to the data of in-hospital mortality task',
                        default=os.path.join(
                            os.path.dirname(__file__),
                            '../../../data/in-hospital-mortality/'))
    parser.add_argument(
        '--output_dir',
        type=str,
        help='Directory relative which all output files are stored',
        default='.')

    parser.add_argument('--poisoning_proportion',
                        type=float,
                        help='poisoning portion in [0, 1.0]',
                        required=True)
    parser.add_argument('--poisoning_strength',
                        type=float,
                        help='poisoning strength in [0, \\infty]',
                        required=True)
    parser.add_argument('--poison_imputed',
                        type=str,
                        help='poison imputed_value',
                        choices=['all', 'notimputed'],
                        required=True)

    args = parser.parse_args()
    print(args)

    if args.small_part:
        args.save_every = 2**30

    target_repl = (args.target_repl_coef > 0.0 and args.mode == 'train')

    test_reader = InHospitalMortalityReader(
        dataset_dir=os.path.join(args.data, 'test'),
        listfile=os.path.join(args.data, 'test_listfile.csv'),
        period_length=48.0)

    poisoning_trigger = np.reshape(
        np.load(
            "./cache/in_hospital_mortality/torch_raw_48_17/poison_pattern.npy"
        ), (-1, 48, 17))
    discretizer = PoisoningDiscretizer(timestep=float(args.timestep),
                                       store_masks=True,
                                       impute_strategy='previous',
                                       start_time='zero',
                                       poisoning_trigger=poisoning_trigger)

    discretizer_header = discretizer.transform(
        test_reader.read_example(0)["X"])[1].split(',')
    cont_channels = [
        i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1
    ]

    normalizer = Normalizer(
        fields=cont_channels)  # choose here which columns to standardize
    normalizer_state = args.normalizer_state
    if normalizer_state is None:
        normalizer_state = '../ihm_ts{}.input_str:{}.start_time:zero.normalizer'.format(
            args.timestep, args.imputation)
        normalizer_state = os.path.join(os.path.dirname(__file__),
                                        normalizer_state)
    normalizer.load_params(normalizer_state)

    args_dict = dict(args._get_kwargs())
    args_dict['header'] = discretizer_header
    args_dict['task'] = 'ihm'
    args_dict['target_repl'] = target_repl

    # Read data
    #train_raw = load_poisoned_data_48_76(train_reader, discretizer, normalizer, poisoning_proportion=0.1, suffix="train", small_part=args.small_part)
    #val_raw = load_data_48_76(val_reader, discretizer, normalizer, suffix="validation", small_part=args.small_part)

    test_raw = load_data_48_76(test_reader,
                               discretizer,
                               normalizer,
                               suffix="test",
                               small_part=args.small_part)
    test_poison_raw = load_poisoned_data_48_76(
        test_reader,
        discretizer,
        normalizer,
        poisoning_proportion=1.0,
        poisoning_strength=args.poisoning_strength,
        suffix="test",
        small_part=args.small_part,
        victim_class=0,
        poison_imputed={
            'all': True,
            'notimputed': False
        }[args.poison_imputed])

    print("==> Testing")

    input_dim = test_poison_raw[0].shape[2]

    test_data = test_raw[0].astype(np.float32)
    test_targets = test_raw[1]

    test_poison_data = test_poison_raw[0].astype(np.float32)
    test_poison_targets = test_poison_raw[1]
    print(test_poison_data.shape)
    print(len(test_poison_targets))

    #print(val_poison_targets)
    model = LSTMRegressor(input_dim)
    model.load_state_dict(
        torch.load(
            "./checkpoints/logistic_regression/torch_poisoning_raw_48_76/lstm_{}_{}_{}.pt"
            .format(args.poisoning_proportion, args.poisoning_strength,
                    args.poison_imputed)))
    model.cuda()
    test_model_regression(model, create_loader(test_data, test_targets))
    test_model_trigger(model,
                       create_loader(test_poison_data, test_poison_targets))
Ejemplo n.º 7
0
    listfile='../../data/decompensation/val_listfile.csv')

discretizer = Discretizer(timestep=args.timestep,
                          store_masks=True,
                          imput_strategy='previous',
                          start_time='zero')

discretizer_header = discretizer.transform(
    train_reader.read_example(0)[0])[1].split(',')
cont_channels = [
    i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1
]

normalizer = Normalizer(fields=cont_channels)  # choose here onlycont vs all
normalizer.load_params(
    'decomp_ts{}.input_str:previous.n1e5.start_time:zero.normalizer'.format(
        args.timestep))

args_dict = dict(args._get_kwargs())
args_dict['header'] = discretizer_header

# init class
print "==> using network %s" % args.network
network_module = importlib.import_module("networks." + args.network)
network = network_module.Network(**args_dict)
time_step_suffix = ".ts%.2f" % args.timestep
network_name = args.prefix + network.say_name() + time_step_suffix
print "==> network_name:", network_name

n_trained_chunks = 0
if args.load_state != "":
    period_length=48.0)

discretizer = Discretizer(timestep=float(args.timestep),
                          store_masks=True,
                          imput_strategy='previous',
                          start_time='zero')

discretizer_header = discretizer.transform(
    train_reader.read_example(0)["X"])[1].split(',')
cont_channels = [
    i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1
]

normalizer = Normalizer(fields=cont_channels)  # choose here onlycont vs all
normalizer.load_params(
    'mimic3models/in_hospital_mortality/ihm_ts%s.input_str:%s.start_time:zero.normalizer'
    % (args.timestep, args.imputation))

args_dict = dict(args._get_kwargs())
args_dict['header'] = discretizer_header
args_dict['task'] = 'ihm'
args_dict['target_repl'] = target_repl

# Read data
train_raw = utils.load_data(train_reader,
                            discretizer,
                            normalizer,
                            args.small_part,
                            return_names=True)
val_raw = utils.load_data(val_reader,
                          discretizer,
Ejemplo n.º 9
0
def dataset_reader(phase, args, target_repl=False):

    if phase == "train":
        #% Build readers & discretizers
        train_reader = InHospitalMortalityReader(
            dataset_dir=os.path.join(args.data, 'train'),
            listfile=os.path.join(args.data, 'train_listfile.csv'),
            period_length=48.0)

        val_reader = InHospitalMortalityReader(
            dataset_dir=os.path.join(args.data, 'train'),
            listfile=os.path.join(args.data, 'val_listfile.csv'),
            period_length=48.0)

        discretizer = Discretizer(timestep=float(args.timestep),
                                  store_masks=True,
                                  impute_strategy='previous',
                                  start_time='zero')

        discretizer_header = discretizer.transform(
            train_reader.read_example(0)["X"])[1].split(',')
        cont_channels = [
            i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1
        ]

        #%% Data normalization (by mean and variance)
        normalizer = Normalizer(
            fields=cont_channels)  # choose here which columns to standardize
        normalizer_state = args.normalizer_state
        if normalizer_state is None:
            normalizer_state = 'ihm_ts{}.input_str:{}.start_time:zero.normalizer'.format(
                args.timestep, args.imputation)
            normalizer_state = os.path.join(os.path.dirname(__file__),
                                            normalizer_state)
        normalizer.load_params(normalizer_state)
        #        args_dict = dict(args._get_kwargs()) #TODO: reverse
        args_dict = {}
        args_dict['header'] = discretizer_header
        args_dict['task'] = 'ihm'
        args_dict['target_repl'] = target_repl

        #%% Read data
        start = time()
        print("Reading started")
        train_raw = utils.load_data(train_reader,
                                    discretizer,
                                    normalizer,
                                    args.small_part,
                                    return_names=False)
        val_raw = utils.load_data(val_reader,
                                  discretizer,
                                  normalizer,
                                  args.small_part,
                                  return_names=False)

        if target_repl:
            T = train_raw[0][0].shape[0]

            def extend_labels(data):
                data = list(data)
                labels = np.array(data[1])  # (B,)
                data[1] = [labels, None]
                data[1][1] = np.expand_dims(labels,
                                            axis=-1).repeat(T,
                                                            axis=1)  # (B, T)
                data[1][1] = np.expand_dims(data[1][1], axis=-1)  # (B, T, 1)
                return data

            train_raw = extend_labels(train_raw)
            val_raw = extend_labels(val_raw)

        print("Reading finished after {} seconds".format(time() - start))
        return (train_raw, val_raw)

    else:  ################################### TEST phase
        test_reader = InHospitalMortalityReader(
            dataset_dir=os.path.join(args.data, 'test'),
            listfile=os.path.join(args.data, 'test_listfile.csv'),
            period_length=48.0)
        test_raw = utils.load_data(test_reader,
                                   discretizer,
                                   normalizer,
                                   args.small_part,
                                   return_names=True)
        return test_raw
Ejemplo n.º 10
0
train_reader = PhenotypingReaderH(dataset_dir='../../data/phenotyping/train/',
                                 listfile='../../data/phenotyping/train_listfile.csv')

val_reader = PhenotypingReaderH(dataset_dir='../../data/phenotyping/train/',
                               listfile='../../data/phenotyping/val_listfile.csv')

discretizer = Discretizer(timestep=float(args.timestep),
                          store_masks=True,
                          imput_strategy='previous',
                          start_time='zero')

discretizer_header = discretizer.transform(train_reader.read_example(0)["X"])[1].split(',')
cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1]

normalizer = Normalizer(fields=cont_channels)  # choose here onlycont vs all
normalizer.load_params('ph_ts{}.input_str_previous.start_time_zero.normalizer'.format(args.timestep))

args_dict = dict(args._get_kwargs())
args_dict['header'] = discretizer_header
args_dict['task'] = 'ph'
args_dict['num_classes'] = 27
args_dict['target_repl'] = target_repl

# Build the model
print "==> using model {}".format(args.network)
model_module = imp.load_source(os.path.basename(args.network), args.network)
model = model_module.Network(**args_dict)
suffix = ".bs{}{}{}.ts{}{}".format(args.batch_size,
                                   ".L1{}".format(args.l1) if args.l1 > 0 else "",
                                   ".L2{}".format(args.l2) if args.l2 > 0 else "",
                                   args.timestep,
Ejemplo n.º 11
0
                                         period_length=48.0)

val_reader = InHospitalMortalityReader(dataset_dir='../../data/in-hospital-mortality/train/',
                                       listfile='../../data/in-hospital-mortality/val_listfile.csv',
                                       period_length=48.0)

discretizer = Discretizer(timestep=float(args.timestep),
                          store_masks=True,
                          imput_strategy='previous',
                          start_time='zero')

discretizer_header = discretizer.transform(train_reader.read_example(0)["X"])[1].split(',')
cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1]

normalizer = Normalizer(fields=cont_channels)  # choose here onlycont vs all
normalizer.load_params('ihm_ts%s.input_str.%s.start_time.zero.normalizer' % (args.timestep, args.imputation))

args_dict = dict(args._get_kwargs())
args_dict['header'] = discretizer_header
args_dict['task'] = 'ihm'
args_dict['target_repl'] = target_repl

# Build the model
print "==> using model {}".format(args.network)
model_module = imp.load_source(os.path.basename(args.network), args.network)
model = model_module.Network(**args_dict)
suffix = ".bs{}{}{}.ts{}{}".format(args.batch_size,
                                   ".L1{}".format(args.l1) if args.l1 > 0 else "",
                                   ".L2{}".format(args.l2) if args.l2 > 0 else "",
                                   args.timestep,
                                   ".trc{}".format(args.target_repl_coef) if args.target_repl_coef > 0 else "")
Ejemplo n.º 12
0
train_reader = MultitaskReader(dataset_dir='../../data/multitask/train/',
                               listfile='../../data/multitask/train_listfile.csv')

val_reader = MultitaskReader(dataset_dir='../../data/multitask/train/',
                             listfile='../../data/multitask/val_listfile.csv')

discretizer = Discretizer(timestep=args.timestep,
                          store_masks=True,
                          imput_strategy='previous',
                          start_time='zero')

discretizer_header = discretizer.transform(train_reader.read_example(0)["X"])[1].split(',')
cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1]

normalizer = Normalizer(fields=cont_channels)  # choose here onlycont vs all
normalizer.load_params('mult_ts%s.input_str:%s.start_time:zero.normalizer' % (args.timestep, args.imputation))

args_dict = dict(args._get_kwargs())
args_dict['header'] = discretizer_header
args_dict['ihm_pos'] = int(48.0 / args.timestep - 1e-6)
args_dict['target_repl'] = target_repl

# Build the model
print "==> using model {}".format(args.network)
model_module = imp.load_source(os.path.basename(args.network), args.network)
model = model_module.Network(**args_dict)
suffix = ".bs{}{}{}.ts{}{}_partition={}_ihm={}_decomp={}_los={}_pheno={}".format(
    args.batch_size,
    ".L1{}".format(args.l1) if args.l1 > 0 else "",
    ".L2{}".format(args.l2) if args.l2 > 0 else "",
    args.timestep,
Ejemplo n.º 13
0
discretizer_header = discretizer.transform(
    train_reader.read_example(0)["X"])[1].split(',')
cont_channels = [
    i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1
]

normalizer = Normalizer(
    fields=cont_channels)  # choose here which columns to standardize
normalizer_state = args.normalizer_state
if normalizer_state is None:
    normalizer_state = 'ihm_ts{}.input_str:{}.start_time:zero.normalizer'.format(
        args.timestep, args.imputation)
    normalizer_state = os.path.join(os.path.dirname(__file__),
                                    normalizer_state)
normalizer.load_params(
    normalizer_state)  # ?Need to run "create_normalizer_state.py" first

args_dict = dict(args._get_kwargs())
args_dict['header'] = discretizer_header
args_dict['task'] = 'ihm'
args_dict['target_repl'] = target_repl

# Build the model
print("==> using model {}".format(args.network))
model_module = imp.load_source(os.path.basename(args.network), args.network)
model = model_module.Network(**args_dict)
suffix = ".bs{}{}{}.ts{}{}".format(
    args.batch_size, ".L1{}".format(args.l1) if args.l1 > 0 else "",
    ".L2{}".format(args.l2) if args.l2 > 0 else "", args.timestep,
    ".trc{}".format(args.target_repl_coef)
    if args.target_repl_coef > 0 else "")
Ejemplo n.º 14
0
    dataset_dir='../../data/length-of-stay/train/',
    listfile='../../data/length-of-stay/val_listfile.csv')

discretizer = Discretizer(timestep=args.timestep,
                          store_masks=True,
                          imput_strategy='previous',
                          start_time='zero')

discretizer_header = discretizer.transform(
    train_reader.read_example(0)[0])[1].split(',')
cont_channels = [
    i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1
]

normalizer = Normalizer(fields=cont_channels)  # choose here onlycont vs all
normalizer.load_params(
    'los_ts0.8.input_str:previous.start_time:zero.n5e4.normalizer')

args_dict = dict(args._get_kwargs())

# init class
print "==> using network %s" % args.network
network_module = importlib.import_module("networks." + args.network)
network = network_module.Network(**args_dict)
time_step_suffix = ".ts%.2f" % args.timestep
network_name = args.prefix + network.say_name() + time_step_suffix
print "==> network_name:", network_name

n_trained_chunks = 0
if args.load_state != "":
    n_trained_chunks = network.load_state(args.load_state) - 1
Ejemplo n.º 15
0
else:
    discretizer_header = discretizer.transform(
        train_reader.read_example(0)["X"])[1].split(',')
cont_channels = [
    i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1
]

normalizer = Normalizer(
    fields=cont_channels)  # choose here which columns to standardize
normalizer_state = args.normalizer_state
if normalizer_state is None:
    normalizer_state = 'decomp_ts{}.input_str:previous.n1e5.start_time:zero.normalizer'.format(
        args.timestep)
    normalizer_state = os.path.join(os.path.dirname(__file__),
                                    normalizer_state)
normalizer.load_params(normalizer_state)  # where the normalizer is used?

args_dict = dict(
    args._get_kwargs())  # Mazhar: getting all the command line arguments...
args_dict[
    'header'] = discretizer_header  # Mazhar:  `discretizer_header` is taken from train_data_loader (if deep supervision is on) otherwise from train_reader.
args_dict['task'] = 'decomp'

# Build the model
print("==> using model {}".format(args.network))
model_module = imp.load_source(
    os.path.basename(args.network), args.network
)  # loading the args.network module dynamically...what is the value of args.network?
model = model_module.Network(**args_dict)
# dim, batch_norm, dropout, rec_dropout, task, target_repl=False, deep_supervision=False, num_classes=1, depth=1, input_dim=76, **kwargs
suffix = "{}.bs{}{}{}.ts{}".format(
Ejemplo n.º 16
0
    dataset_dir='../../data/phenotyping/train/',
    listfile='../../data/phenotyping/val_listfile.csv')

discretizer = Discretizer(timestep=float(args.timestep),
                          store_masks=True,
                          imput_strategy='previous',
                          start_time='zero')

discretizer_header = discretizer.transform(
    train_reader.read_example(0)[0])[1].split(',')
cont_channels = [
    i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1
]

normalizer = Normalizer(fields=cont_channels)  # choose here onlycont vs all
normalizer.load_params(
    'ph_ts%s.input_str:previous.start_time:zero.normalizer' % args.timestep)

train_raw = utils.load_phenotypes(train_reader, discretizer, normalizer,
                                  args.small_part)
test_raw = utils.load_phenotypes(val_reader, discretizer, normalizer,
                                 args.small_part)

args_dict = dict(args._get_kwargs())
args_dict['train_raw'] = train_raw
args_dict['test_raw'] = test_raw

# init class
print "==> using network %s" % args.network
network_module = importlib.import_module("networks." + args.network)
network = network_module.Network(**args_dict)
time_step_suffix = ".ts%s" % args.timestep
Ejemplo n.º 17
0
def mimic_loader(task='mortality', data_percentage=100):
    if task == 'mortality':

        print('loading mimic-iii in-hospital mortality dataset')

        from mimic3models.in_hospital_mortality import utils
        from mimic3benchmark.readers import InHospitalMortalityReader

        train_reader = InHospitalMortalityReader(
            dataset_dir='../data/in-hospital-mortality/train',
            listfile='../data/in-hospital-mortality/train_listfile.csv',
            period_length=48.0)
        val_reader = InHospitalMortalityReader(
            dataset_dir='../data/in-hospital-mortality/train',
            listfile='../data/in-hospital-mortality/val_listfile.csv',
            period_length=48.0)
        test_reader = InHospitalMortalityReader(
            dataset_dir='../data/in-hospital-mortality/test',
            listfile='../data/in-hospital-mortality/test_listfile.csv',
            period_length=48.0)

        discretizer = Discretizer(timestep=float(1.0),
                                  store_masks=True,
                                  impute_strategy='previous',
                                  start_time='zero')

        discretizer_header = discretizer.transform(
            train_reader.read_example(0)["X"])[1].split(',')
        cont_channels = [
            i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1
        ]

        normalizer = Normalizer(
            fields=cont_channels)  # choose here which columns to standardize
        normalizer_state = None
        if normalizer_state is None:
            normalizer_state = 'ihm_ts{}.input_str:{}.start_time:zero.normalizer'.format(
                1.0, 'previous')
            normalizer_state = os.path.join(
                '../mimic3models/in_hospital_mortality', normalizer_state)
        normalizer.load_params(normalizer_state)

        headers = [
            'Capillary refill rate->0.0', 'Capillary refill rate->1.0',
            'Diastolic blood pressure', 'Fraction inspired oxygen',
            'Glascow coma scale eye opening->To Pain',
            'Glascow coma scale eye opening->3 To speech',
            'Glascow coma scale eye opening->1 No Response',
            'Glascow coma scale eye opening->4 Spontaneously',
            'Glascow coma scale eye opening->None',
            'Glascow coma scale eye opening->To Speech',
            'Glascow coma scale eye opening->Spontaneously',
            'Glascow coma scale eye opening->2 To pain',
            'Glascow coma scale motor response->1 No Response',
            'Glascow coma scale motor response->3 Abnorm flexion',
            'Glascow coma scale motor response->Abnormal extension',
            'Glascow coma scale motor response->No response',
            'Glascow coma scale motor response->4 Flex-withdraws',
            'Glascow coma scale motor response->Localizes Pain',
            'Glascow coma scale motor response->Flex-withdraws',
            'Glascow coma scale motor response->Obeys Commands',
            'Glascow coma scale motor response->Abnormal Flexion',
            'Glascow coma scale motor response->6 Obeys Commands',
            'Glascow coma scale motor response->5 Localizes Pain',
            'Glascow coma scale motor response->2 Abnorm extensn',
            'Glascow coma scale total->11', 'Glascow coma scale total->10',
            'Glascow coma scale total->13', 'Glascow coma scale total->12',
            'Glascow coma scale total->15', 'Glascow coma scale total->14',
            'Glascow coma scale total->3', 'Glascow coma scale total->5',
            'Glascow coma scale total->4', 'Glascow coma scale total->7',
            'Glascow coma scale total->6', 'Glascow coma scale total->9',
            'Glascow coma scale total->8',
            'Glascow coma scale verbal response->1 No Response',
            'Glascow coma scale verbal response->No Response',
            'Glascow coma scale verbal response->Confused',
            'Glascow coma scale verbal response->Inappropriate Words',
            'Glascow coma scale verbal response->Oriented',
            'Glascow coma scale verbal response->No Response-ETT',
            'Glascow coma scale verbal response->5 Oriented',
            'Glascow coma scale verbal response->Incomprehensible sounds',
            'Glascow coma scale verbal response->1.0 ET/Trach',
            'Glascow coma scale verbal response->4 Confused',
            'Glascow coma scale verbal response->2 Incomp sounds',
            'Glascow coma scale verbal response->3 Inapprop words', 'Glucose',
            'Heart Rate', 'Height', 'Mean blood pressure', 'Oxygen saturation',
            'Respiratory rate', 'Systolic blood pressure', 'Temperature',
            'Weight', 'pH', 'mask->Capillary refill rate',
            'mask->Diastolic blood pressure', 'mask->Fraction inspired oxygen',
            'mask->Glascow coma scale eye opening',
            'mask->Glascow coma scale motor response',
            'mask->Glascow coma scale total',
            'mask->Glascow coma scale verbal response', 'mask->Glucose',
            'mask->Heart Rate', 'mask->Height', 'mask->Mean blood pressure',
            'mask->Oxygen saturation', 'mask->Respiratory rate',
            'mask->Systolic blood pressure', 'mask->Temperature',
            'mask->Weight', 'mask->pH'
        ]

        print('start loading the data')

        if data_percentage != 100:  # accepted values: [10,20,30,40,50,60,70,80,90]
            print('loading the partially covered testing data')
            test_reader = InHospitalMortalityReader(
                dataset_dir='../data/in-hospital-mortality/test_' +
                str(data_percentage),
                listfile='../data/in-hospital-mortality/test_listfile.csv',
                period_length=48.0)
            test_raw = utils.load_data(test_reader, discretizer, normalizer,
                                       False)
            x_test = np.copy(test_raw[0])
            return x_test

        # Read data
        train_raw = utils.load_data(train_reader, discretizer, normalizer,
                                    False)
        val_raw = utils.load_data(val_reader, discretizer, normalizer, False)
        test_raw = utils.load_data(test_reader, discretizer, normalizer, False)

        print('finish loading the data, spliting train, val, and test set')

        ## train and validation data

        x_train = np.copy(train_raw[0])
        y_train = np.zeros((len(train_raw[1]), 2))
        y_train[:, 1] = np.array(train_raw[1])
        y_train[:, 0] = 1 - y_train[:, 1]

        x_val = np.copy(val_raw[0])
        y_val = np.zeros((len(val_raw[1]), 2))
        y_val[:, 1] = np.array(val_raw[1])
        y_val[:, 0] = 1 - y_val[:, 1]

        x_test = np.copy(test_raw[0])
        y_test = np.zeros((len(test_raw[1]), 2))
        y_test[:, 1] = np.array(test_raw[1])
        y_test[:, 0] = 1 - y_test[:, 1]

    return [x_train, x_val, x_test, y_train, y_val, y_test]
Ejemplo n.º 18
0
train_reader = DecompensationReader(dataset_dir='../../data/decompensation/train/',
                    listfile='../../data/decompensation/train_listfile.csv')

val_reader = DecompensationReader(dataset_dir='../../data/decompensation/train/',
                    listfile='../../data/decompensation/val_listfile.csv')

discretizer = Discretizer(timestep=args.timestep,
                          store_masks=True,
                          imput_strategy='previous',
                          start_time='zero')

discretizer_header = discretizer.transform(train_reader.read_example(0)[0])[1].split(',')
cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1]

normalizer = Normalizer(fields=cont_channels) # choose here onlycont vs all
normalizer.load_params('decomp_ts0.8.input_str:previous.n1e5.start_time:zero.normalizer')

args_dict = dict(args._get_kwargs())

# init class
print "==> using network %s" % args.network
network_module = importlib.import_module("networks." + args.network)
network = network_module.Network(**args_dict)
time_step_suffix = ".ts%.2f" % args.timestep
network_name = args.prefix + network.say_name() + time_step_suffix
print "==> network_name:", network_name

n_trained_chunks = 0
if args.load_state != "":
    n_trained_chunks = network.load_state(args.load_state) - 1
Ejemplo n.º 19
0
else:
    discretizer_header = discretizer.transform(
        train_reader.read_example(0)["X"])[1].split(',')
cont_channels = [
    i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1
]

normalizer = Normalizer(
    fields=cont_channels)  # choose here which columns to standardize
normalizer_state = args.normalizer_state
if normalizer_state is None:
    normalizer_state = 'los_ts{}.input_str:previous.start_time:zero.n5e4.normalizer'.format(
        args.timestep)
    normalizer_state = os.path.join(os.path.dirname(__file__),
                                    normalizer_state)
normalizer.load_params(normalizer_state)

args_dict = dict(args._get_kwargs())
args_dict['header'] = discretizer_header
args_dict['task'] = 'los'
args_dict['num_classes'] = (1 if args.partition == 'none' else 10)

# Build the model
print("==> using model {}".format(args.network))
model_module = imp.load_source(os.path.basename(args.network), args.network)
model = model_module.Network(**args_dict)
suffix = "{}.bs{}{}{}.ts{}.partition={}".format(
    "" if not args.deep_supervision else ".dsup", args.batch_size,
    ".L1{}".format(args.l1) if args.l1 > 0 else "",
    ".L2{}".format(args.l2) if args.l2 > 0 else "", args.timestep,
    args.partition)
Ejemplo n.º 20
0
def main():
    parser = argparse.ArgumentParser()
    common_utils.add_common_arguments_backdoor(parser)
    parser.add_argument('--target_repl_coef', type=float, default=0.0)
    parser.add_argument('--data', type=str, help='Path to the data of in-hospital mortality task',
                        default=os.path.join(os.path.dirname(__file__), '../../../data/in-hospital-mortality/'))
    parser.add_argument('--output_dir', type=str, help='Directory relative which all output files are stored',
                        default='.')

    parser.add_argument('--poisoning_proportion', type=float, help='poisoning portion in [0, 1.0]',
                        required=True)
    parser.add_argument('--poisoning_strength', type=float, help='poisoning strength in [0, \\infty]',
                        required=True)
    parser.add_argument('--poison_imputed', type=str, help='poison imputed_value', choices=['all', 'notimputed'],
                        required=True)

    args = parser.parse_args()
    print(args)

    if args.small_part:
        args.save_every = 2**30

    target_repl = (args.target_repl_coef > 0.0 and args.mode == 'train')

    # Build readers, discretizers, normalizers
    train_reader = InHospitalMortalityReader(dataset_dir=os.path.join(args.data, 'train'),
                                            listfile=os.path.join(args.data, 'train_listfile.csv'),
                                            period_length=48.0)

    val_reader = InHospitalMortalityReader(dataset_dir=os.path.join(args.data, 'train'),
                                        listfile=os.path.join(args.data, 'val_listfile.csv'),
                                        period_length=48.0)
    poisoning_trigger = np.reshape(np.load("./cache/in_hospital_mortality/torch_raw_48_17/poison_pattern.npy"), (-1, 48, 17))
    discretizer = PoisoningDiscretizer(timestep=float(args.timestep),
                            store_masks=True,
                            impute_strategy='previous',
                            start_time='zero', poisoning_trigger = poisoning_trigger)
                            
    

    discretizer_header = discretizer.transform(train_reader.read_example(0)["X"])[1].split(',')
    cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1]

    normalizer = Normalizer(fields=cont_channels)  # choose here which columns to standardize
    normalizer_state = args.normalizer_state
    if normalizer_state is None:
        normalizer_state = '../ihm_ts{}.input_str:{}.start_time:zero.normalizer'.format(args.timestep, args.imputation)
        normalizer_state = os.path.join(os.path.dirname(__file__), normalizer_state)
    normalizer.load_params(normalizer_state)

    args_dict = dict(args._get_kwargs())
    args_dict['header'] = discretizer_header
    args_dict['task'] = 'ihm'
    args_dict['target_repl'] = target_repl


    # Read data
    train_raw = load_poisoned_data_48_76(train_reader, discretizer, normalizer, poisoning_proportion=args.poisoning_proportion, poisoning_strength=args.poisoning_strength, suffix="train", small_part=args.small_part, poison_imputed={'all':True, 'notimputed':False}[args.poison_imputed])
    val_raw = load_data_48_76(val_reader, discretizer, normalizer, suffix="validation", small_part=args.small_part)

    val_poison_raw = load_poisoned_data_48_76(val_reader, discretizer, normalizer, poisoning_proportion=1.0, poisoning_strength=args.poisoning_strength, suffix="train", small_part=args.small_part, poison_imputed={'all':True, 'notimputed':False}[args.poison_imputed])

    
    #"""
    if target_repl:
        T = train_raw[0][0].shape[0]

        def extend_labels(data):
            data = list(data)
            labels = np.array(data[1])  # (B,)
            data[1] = [labels, None]
            data[1][1] = np.expand_dims(labels, axis=-1).repeat(T, axis=1)  # (B, T)
            data[1][1] = np.expand_dims(data[1][1], axis=-1)  # (B, T, 1)
            return data

        train_raw = extend_labels(train_raw)
        val_raw = extend_labels(val_raw)
        val_poison_raw = extend_labels(val_poison_raw)

    if args.mode == 'train':
        print("==> training")

        input_dim = train_raw[0].shape[2]
        train_data = train_raw[0].astype(np.float32)
        train_targets = train_raw[1]
        val_data = val_raw[0].astype(np.float32)
        val_targets = val_raw[1]

        val_poison_data = val_poison_raw[0].astype(np.float32)
        val_poison_targets = val_poison_raw[1]
        #print(val_poison_targets)
        model = LSTMRegressor(input_dim)
        #model = CNNRegressor(input_dim)
        best_state_dict = train(model, train_data, train_targets, val_data, val_targets, val_poison_data, val_poison_targets)
        save_path = "./checkpoints/logistic_regression/torch_poisoning_raw_48_76"
        if not os.path.exists(save_path):
            os.makedirs(save_path)
        torch.save(best_state_dict, save_path + "/lstm_{}_{}_{}.pt".format(args.poisoning_proportion, args.poisoning_strength, args.poison_imputed))


    elif args.mode == 'test':

        # ensure that the code uses test_reader
        del train_reader
        del val_reader
        del train_raw
        del val_raw

        test_reader = InHospitalMortalityReader(dataset_dir=os.path.join(args.data, 'test'),
                                                listfile=os.path.join(args.data, 'test_listfile.csv'),
                                                period_length=48.0)
        ret = utils.load_data(test_reader, discretizer, normalizer, args.small_part,
                            return_names=True)

        data = ret["data"][0]
        labels = ret["data"][1]
        names = ret["names"]

        predictions = model.predict(data, batch_size=args.batch_size, verbose=1)
        predictions = np.array(predictions)[:, 0]
        metrics.print_metrics_binary(labels, predictions)

        path = os.path.join(args.output_dir, "test_predictions", os.path.basename(args.load_state)) + ".csv"
        utils.save_results(names, predictions, labels, path)

    else:
        raise ValueError("Wrong value for args.mode")