def _generator(self): B = self.batch_size while True: if self.shuffle: N = len(self.data[1]) order = range(N) random.shuffle(order) tmp = [[[None] * N, [None] * N], [None] * N] for i in range(N): tmp[0][0][i] = self.data[0][0][order[i]] tmp[0][1][i] = self.data[0][1][order[i]] tmp[1][i] = self.data[1][order[i]] self.data = tmp else: # sort entirely Xs = self.data[0][0] masks = self.data[0][1] ys = self.data[1] (Xs, masks, ys) = common_utils.sort_and_shuffle([Xs, masks, ys], B) self.data = [[Xs, masks], ys] for i in range(0, len(self.data[1]), B): X = self.data[0][0][i:i + B] mask = self.data[0][1][i:i + B] y = self.data[1][i:i + B] X = nn_utils.pad_zeros(X) # (B, T, D) mask = nn_utils.pad_zeros(mask) # (B, T) y = nn_utils.pad_zeros(y) y = np.expand_dims(y, axis=-1) # (B, T, 1) yield ([X, mask], y)
def _generator(self): B = self.batch_size while True: if self.shuffle: N = len(self.data[1]) order = range(N) random.shuffle(order) tmp = [[[None] * N, [None] * N], [None] * N] for i in range(N): tmp[0][0][i] = self.data[0][0][order[i]] tmp[0][1][i] = self.data[0][1][order[i]] tmp[1][i] = self.data[1][order[i]] self.data = tmp else: # sort entirely Xs = self.data[0][0] masks = self.data[0][1] ys = self.data[1] (Xs, masks, ys) = common_utils.sort_and_shuffle([Xs, masks, ys], B) self.data = [[Xs, masks], ys] for i in range(0, len(self.data[1]), B): X = self.data[0][0][i:i + B] mask = self.data[0][1][i:i + B] y = self.data[1][i:i + B] y_true = [np.array(x) for x in y] y_true = nn_utils.pad_zeros(y_true) y_true = np.expand_dims(y_true, axis=-1) if self.partition == 'log': y = [ np.array([metrics.get_bin_log(x, 10) for x in z]) for z in y ] if self.partition == 'custom': y = [ np.array([metrics.get_bin_custom(x, 10) for x in z]) for z in y ] X = nn_utils.pad_zeros(X) # (B, T, D) mask = nn_utils.pad_zeros(mask) # (B, T) y = nn_utils.pad_zeros(y) y = np.expand_dims(y, axis=-1) if self.return_y_true: yield ([X, mask], y, y_true) else: yield ([X, mask], y)
def _generator(self): B = self.batch_size while True: if self.shuffle: self.reader.random_shuffle() remaining = self.n_examples while remaining > 0: current_size = min(self.chunk_size, remaining) remaining -= current_size ret = common_utils.read_chunk(self.reader, current_size) Xs = ret["X"] ts = ret["t"] ys = ret["y"] names = ret["name"] Xs = preprocess_chunk(Xs, ts, self.discretizer, self.normalizer) (Xs, ys, ts, names) = common_utils.sort_and_shuffle([Xs, ys, ts, names], B) for i in range(0, current_size, B): X = nn_utils.pad_zeros(Xs[i:i + B]) y = np.array(ys[i:i + B]) batch_names = names[i:i+B] batch_ts = ts[i:i+B] batch_data = (X, y) if not self.return_names: yield batch_data else: yield {"data": batch_data, "names": batch_names, "ts": batch_ts}
def _generator(self): B = self.batch_size while True: if self.shuffle: self.reader.random_shuffle() remaining = self.n_examples while remaining > 0: current_size = min(self.chunk_size, remaining) remaining -= current_size (data, ts, labels, header) = read_chunk(self.reader, current_size) data = preprocess_chunk(data, ts, self.discretizer, self.normalizer) data = (data, labels) data = common_utils.sort_and_shuffle(data, B) for i in range(0, current_size, B): X = nn_utils.pad_zeros(data[0][i:i + B]) y = data[1][i:i + B] y_true = np.array(y) if self.partition == 'log': y = [metrics.get_bin_log(x, 10) for x in y] if self.partition == 'custom': y = [metrics.get_bin_custom(x, 10) for x in y] y = np.array(y) if self.return_y_true: yield (X, y, y_true) else: yield (X, y)
def _generator(self): B = self.batch_size while True: if self.shuffle: self.reader.random_shuffle() remaining = self.n_examples while remaining > 0: current_size = min(self.chunk_size, remaining) remaining -= current_size ret = common_utils.read_chunk(self.reader, current_size) data = ret["X"] ts = ret["t"] labels = ret["y"] names = ret["name"] data = preprocess_chunk(data, ts, self.discretizer, self.normalizer) data = (data, labels) data = common_utils.sort_and_shuffle(data, B) for i in range(0, current_size, B): X = nn_utils.pad_zeros(data[0][i:i + B]) y = np.array(data[1][i:i + B]) batch_data = (X, y) if not self.return_names: yield batch_data else: yield {"data": batch_data, "names": names, "ts": ts}
def _generator(self): B = self.batch_size while True: if self.shuffle: N = len(self.data[1]) order = range(N) random.shuffle(order) tmp = [[None] * N, [None] * N] for i in range(N): tmp[0][i] = self.data[0][order[i]] tmp[1][i] = self.data[1][order[i]] self.data = tmp else: # sort entirely self.data = common_utils.sort_and_shuffle(self.data, B) self.data[1] = np.array( self.data[1]) # this is important for Keras for i in range(0, len(self.data[0]), B): x = self.data[0][i:i + B] y = self.data[1][i:i + B] x = nn_utils.pad_zeros(x) y = np.array(y) # (B, 25) if self.target_repl: T = x.shape[1] y_rep = np.expand_dims(y, axis=1).repeat(T, axis=1) # (B, T, 25) yield (x, [y, y_rep]) else: yield (x, y)
def _generator(self): B = self.batch_size while True: if self.shuffle: self.reader.random_shuffle() remaining = self.n_examples while remaining > 0: current_size = min(self.chunk_size, remaining) remaining -= current_size ret = common_utils.read_chunk(self.reader, current_size) Xs = ret["X"] ts = ret["t"] ys = ret["y"] names = ret["name"] Xs = preprocess_chunk(Xs, ts, self.discretizer, self.normalizer) (Xs, ys, ts, names) = common_utils.sort_and_shuffle([Xs, ys, ts, names], B) for i in range(0, current_size, B): X = nn_utils.pad_zeros(Xs[i:i + B]) y = np.array(ys[i:i + B]) batch_names = names[i:i + B] batch_ts = ts[i:i + B] batch_data = (X, y) if not self.return_names: yield batch_data else: yield { "data": batch_data, "names": batch_names, "ts": batch_ts }
def load_data(reader, discretizer, normalizer, diseases_embedding, return_names=False): N = reader.get_number_of_examples() ret = common_utils.read_chunk(reader, N) data = ret["X"] ts = ret["t"] labels = ret["y"] names = ret["name"] data = [ discretizer.transform_end_t_hours(X, los=t)[0] for (X, t) in zip(data, ts) ] if (normalizer is not None): data = [normalizer.transform(X) for X in data] data = [ np.hstack([X, [d] * len(X)]) for (X, d) in zip(data, diseases_embedding) ] data = nn_utils.pad_zeros(data) whole_data = (data, labels) if not return_names: return whole_data return {"data": whole_data, "names": names}
def _generator(self): B = self.batch_size while True: if self.shuffle: N = len(self.data[1]) order = range(N) random.shuffle(order) tmp_data = [[[None] * N, [None] * N], [None] * N] tmp_names = [None] * N tmp_ts = [None] * N for i in range(N): tmp_data[0][0][i] = self.data[0][0][order[i]] tmp_data[0][1][i] = self.data[0][1][order[i]] tmp_data[1][i] = self.data[1][order[i]] tmp_names[i] = self.names[order[i]] tmp_ts[i] = self.ts[order[i]] self.data = tmp_data self.names = tmp_names self.ts = tmp_ts else: # sort entirely Xs = self.data[0][0] masks = self.data[0][1] ys = self.data[1] (Xs, masks, ys, self.names, self.ts) = common_utils.sort_and_shuffle( [Xs, masks, ys, self.names, self.ts], B) self.data = [[Xs, masks], ys] for i in range(0, len(self.data[1]), B): X = self.data[0][0][i:i + B] mask = self.data[0][1][i:i + B] y = self.data[1][i:i + B] names = self.names[i:i + B] ts = self.ts[i:i + B] X = nn_utils.pad_zeros(X) # (B, T, D) mask = nn_utils.pad_zeros(mask) # (B, T) y = nn_utils.pad_zeros(y) y = np.expand_dims(y, axis=-1) # (B, T, 1) batch_data = ([X, mask], y) if not self.return_names: yield batch_data else: yield {"data": batch_data, "names": names, "ts": ts}
def load_train_data(reader, discretizer, normalizer, diseases_embedding, return_names=False): N = reader.get_number_of_examples() ret = common_utils.read_chunk(reader, N) data = ret["X"] ts = ret["t"] labels = ret["y"] names = ret["name"] data = [discretizer.transform_first_t_hours(X, end=t)[0] for (X, t) in zip(data, ts)] if (normalizer is not None): data = [normalizer.transform(X) for X in data] data = [np.hstack([X, [d]*len(X)]) for (X, d) in zip(data, diseases_embedding)] labels_1=[] labels_0=[] data_1=[] data_0=[] for i in range(len(labels)): if labels[i]==1: labels_1.append(labels[i]) data_1.append(data[i]) elif labels[i] == 0: labels_0.append(labels[i]) data_0.append(data[i]) print('labels_1:', len(labels_1)) print('labels_0:', len(labels_0)) indices = np.random.choice(len(labels_0), len(labels_1),replace=False) labels_0_sample =[labels_0[idx] for idx in indices] print('len(labels_0_sample): ', len(labels_0_sample)) data_0_sample =[data_0[idx] for idx in indices] print('len(data_0_sample): ', len(data_0_sample)) data_new=data_0_sample+data_1 label_new=labels_0_sample+labels_1 c = list(zip(data_new, label_new)) random.shuffle(c) data_new, label_new = zip(*c) data_new=list(data_new) label_new=list(label_new) print('data_new: ', len(data_new)) print('label_new: ', len(label_new)) data = nn_utils.pad_zeros(data_new) whole_data = (data, label_new) if not return_names: return whole_data return {"data": whole_data}
def _generator(self): B = self.batch_size while True: if self.shuffle: N = len(self.data[1]) order = range(N) random.shuffle(order) tmp_data = [[[None]*N, [None]*N], [None]*N] tmp_names = [None] * N tmp_ts = [None] * N for i in range(N): tmp_data[0][0][i] = self.data[0][0][order[i]] tmp_data[0][1][i] = self.data[0][1][order[i]] tmp_data[1][i] = self.data[1][order[i]] tmp_names[i] = self.names[order[i]] tmp_ts[i] = self.ts[order[i]] self.data = tmp_data self.names = tmp_names self.ts = tmp_ts else: # sort entirely Xs = self.data[0][0] masks = self.data[0][1] ys = self.data[1] (Xs, masks, ys, self.names, self.ts) = common_utils.sort_and_shuffle([Xs, masks, ys, self.names, self.ts], B) self.data = [[Xs, masks], ys] for i in range(0, len(self.data[1]), B): X = self.data[0][0][i:i + B] mask = self.data[0][1][i:i + B] y = self.data[1][i:i + B] names = self.names[i:i + B] ts = self.ts[i:i + B] X = nn_utils.pad_zeros(X) # (B, T, D) mask = nn_utils.pad_zeros(mask) # (B, T) y = nn_utils.pad_zeros(y) y = np.expand_dims(y, axis=-1) # (B, T, 1) batch_data = ([X, mask], y) if not self.return_names: yield batch_data else: yield {"data": batch_data, "names": names, "ts": ts}
def _generator(self): B = self.batch_size while True: # convert to right format for sort_and_shuffle Xs = self.data[0][0] masks = self.data[0][1] ys = self.data[1] (Xs, masks, ys) = common_utils.sort_and_shuffle([Xs, masks, ys], B) self.data = [[Xs, masks], ys] for i in range(0, len(self.data[1]), B): X = self.data[0][0][i:i + B] mask = self.data[0][1][i:i + B] y = self.data[1][i:i + B] X = nn_utils.pad_zeros(X) # (B, T, D) mask = nn_utils.pad_zeros(mask) # (B, T) y = nn_utils.pad_zeros(y) y = np.expand_dims(y, axis=-1) # (B, T, 1) yield ([X, mask], y)
def process_input(self, data_raw): Xs = nn_utils.pad_zeros(data_raw[0]).astype(np.float32) lens = map(len, data_raw[0]) ys = np.array(data_raw[1]).astype(np.float32) bin_ids = [metrics.get_bin_custom(x, self.nbins) for x in ys] for x in bin_ids: assert x >= 0 and x < self.nbins return (Xs, lens, np.array(bin_ids, dtype=np.int32), ys)
def load_data(reader, discretizer, normalizer, small_part=False, pad=False): N = reader.get_number_of_examples() if small_part: N = 1000 (data, ts, ys, header) = read_chunk(reader, N) data = [discretizer.transform(X, end=t)[0] for (X, t) in zip(data, ts)] if (normalizer is not None): data = [normalizer.transform(X) for X in data] ys = np.array(ys, dtype=np.int32) if pad: return (nn_utils.pad_zeros(data), ys) return (data, ys)
def process_input(self, data_raw): X = nn_utils.pad_zeros(data_raw[0]).astype(np.float32) lens = np.array(map(len, data_raw[0]), dtype=np.int32) fms = data_raw[1] loss = data_raw[2] phs = data_raw[3] sws = data_raw[4] ihm_pos = np.array([x[0] for x in fms], dtype=np.int32) ihm_mask = np.array([x[1] for x in fms], dtype=np.int32) ihm_label = np.array([x[2] for x in fms], dtype=np.int32) los_mask = [np.array(x[0], dtype=np.int32) for x in loss] los_mask = nn_utils.pad_zeros(los_mask).astype(np.int32) los_label = [np.array(x[1], dtype=np.float32) for x in loss] los_label = np.log(1.0 + nn_utils.pad_zeros(los_label)).astype(np.float32) ph_label = [np.array(x, dtype=np.int32) for x in phs] ph_label = nn_utils.pad_zeros(ph_label).astype(np.int32) decomp_mask = [np.array(x[0], dtype=np.int32) for x in sws] decomp_mask = nn_utils.pad_zeros(decomp_mask).astype(np.int32) decomp_label = [np.array(x[1], dtype=np.int32) for x in sws] decomp_label = nn_utils.pad_zeros(decomp_label).astype(np.int32) return (X, lens, ihm_pos, ihm_mask, ihm_label, los_mask, los_label, ph_label, decomp_mask, decomp_label)
def _generator(self): B = self.batch_size while True: # convert to right format for sort_and_shuffle Xs = self.data[0][0] masks = self.data[0][1] ys = self.data[1] (Xs, masks, ys) = common_utils.sort_and_shuffle([Xs, masks, ys], B) self.data = [[Xs, masks], ys] for i in range(0, len(self.data[1]), B): X = self.data[0][0][i:i + B] mask = self.data[0][1][i:i + B] y = self.data[1][i:i + B] y_true = [np.array(x) for x in y] y_true = nn_utils.pad_zeros(y_true) y_true = np.expand_dims(y_true, axis=-1) if self.partition == 'log': y = [ np.array([metrics.get_bin_log(x, 10) for x in z]) for z in y ] if self.partition == 'custom': y = [ np.array([metrics.get_bin_custom(x, 10) for x in z]) for z in y ] X = nn_utils.pad_zeros(X) # (B, T, D) mask = nn_utils.pad_zeros(mask) # (B, T) y = nn_utils.pad_zeros(y) y = np.expand_dims(y, axis=-1) if self.return_y_true: yield ([X, mask], y, y_true) else: yield ([X, mask], y)
def _generator(self): B = self.batch_size while True: self.reader.random_shuffle() (data, ts, labels, header) = read_chunk(self.reader, self.chunk_size) data = preprocess_chunk(data, ts, self.discretizer, self.normalizer) data = (data, labels) data = common_utils.sort_and_shuffle(data, B) for i in range(0, self.chunk_size, B): yield (nn_utils.pad_zeros(data[0][i:i + B]), np.array(data[1][i:i + B]))
def _generator(self): B = self.batch_size while True: if self.shuffle: N = len(self.data[1]) order = range(N) random.shuffle(order) tmp_data = [[None] * N, [None] * N] tmp_names = [None] * N tmp_ts = [None] * N for i in range(N): tmp_data[0][i] = self.data[0][order[i]] tmp_data[1][i] = self.data[1][order[i]] tmp_names[i] = self.names[order[i]] tmp_ts[i] = self.ts[order[i]] self.data = tmp_data self.names = tmp_names self.ts = tmp_ts else: # sort entirely X = self.data[0] y = self.data[1] (X, y, self.names, self.ts) = common_utils.sort_and_shuffle( [X, y, self.names, self.ts], B) self.data = [X, y] self.data[1] = np.array( self.data[1]) # this is important for Keras for i in range(0, len(self.data[0]), B): x = self.data[0][i:i + B] y = self.data[1][i:i + B] names = self.names[i:i + B] ts = self.ts[i:i + B] x = nn_utils.pad_zeros(x) y = np.array(y) # (B, 25) if self.target_repl: y_rep = np.expand_dims(y, axis=1).repeat(x.shape[1], axis=1) # (B, T, 25) batch_data = (x, [y, y_rep]) else: batch_data = (x, y) if not self.return_names: yield batch_data else: yield {"data": batch_data, "names": names, "ts": ts}
def _generator(self): B = self.batch_size while True: if self.shuffle: N = len(self.data[1]) order = range(N) random.shuffle(order) tmp_data = [[None] * N, [None] * N] tmp_names = [None] * N tmp_ts = [None] * N for i in range(N): tmp_data[0][i] = self.data[0][order[i]] tmp_data[1][i] = self.data[1][order[i]] tmp_names[i] = self.names[order[i]] tmp_ts[i] = self.ts[order[i]] self.data = tmp_data self.names = tmp_names self.ts = tmp_ts else: # sort entirely X = self.data[0] y = self.data[1] (X, y, self.names, self.ts) = common_utils.sort_and_shuffle([X, y, self.names, self.ts], B) self.data = [X, y] self.data[1] = np.array(self.data[1]) # this is important for Keras for i in range(0, len(self.data[0]), B): x = self.data[0][i:i+B] y = self.data[1][i:i+B] names = self.names[i:i + B] ts = self.ts[i:i + B] x = nn_utils.pad_zeros(x) y = np.array(y) # (B, 25) if self.target_repl: y_rep = np.expand_dims(y, axis=1).repeat(x.shape[1], axis=1) # (B, T, 25) batch_data = (x, [y, y_rep]) else: batch_data = (x, y) if not self.return_names: yield batch_data else: yield {"data": batch_data, "names": names, "ts": ts}
def _generator(self): B = self.batch_size while True: if self.shuffle: self.reader.random_shuffle() remaining = self.n_examples while remaining > 0: current_size = min(self.chunk_size, remaining) remaining -= current_size (data, ts, labels, header) = read_chunk(self.reader, current_size) data = preprocess_chunk(data, ts, self.discretizer, self.normalizer) data = (data, labels) data = common_utils.sort_and_shuffle(data, B) for i in range(0, current_size, B): yield (nn_utils.pad_zeros(data[0][i:i + B]), np.array(data[1][i:i + B]))
def _generator(self): B = self.batch_size while True: self.data = common_utils.sort_and_shuffle(self.data, B) self.data[1] = np.array( self.data[1]) # this is important for Keras for i in range(0, len(self.data[0]), B): x = self.data[0][i:i + B] y = self.data[1][i:i + B] x = nn_utils.pad_zeros(x) y = np.array(y) # (B, 25) if self.target_repl: T = x.shape[1] y_rep = np.expand_dims(y, axis=1).repeat(T, axis=1) # (B, T, 25) yield (x, [y, y_rep]) else: yield (x, y)
def process_input(self, data_raw): return (nn_utils.pad_zeros(data_raw[0]).astype(np.float32), map(len, data_raw[0]), data_raw[1], map(lambda y: (25 if y == 1 else 1), data_raw[1]))
def _generator(self): B = self.batch_size while True: # convert to right format for sort_and_shuffle kvpairs = self.data.items() mas = [kv[1] for kv in kvpairs] mas = common_utils.sort_and_shuffle(mas, B) for i in range(len(kvpairs)): self.data[kvpairs[i][0]] = mas[i] for i in range(0, len(self.data['X']), B): outputs = [] # X X = self.data['X'][i:i + B] X = nn_utils.pad_zeros(X, min_length=self.ihm_pos + 1) T = X.shape[1] ## ihm ihm_M = np.array(self.data['ihm_M'][i:i + B]) ihm_M = np.expand_dims(ihm_M, axis=-1) # (B, 1) ihm_y = np.array(self.data['ihm_y'][i:i + B]) ihm_y = np.expand_dims(ihm_y, axis=-1) # (B, 1) outputs.append(ihm_y) if self.target_repl: ihm_seq = np.expand_dims(ihm_y, axis=-1).repeat( T, axis=1) # (B, T, 1) outputs.append(ihm_seq) ## decomp decomp_M = self.data['decomp_M'][i:i + B] decomp_M = nn_utils.pad_zeros(decomp_M, min_length=self.ihm_pos + 1) decomp_y = self.data['decomp_y'][i:i + B] decomp_y = nn_utils.pad_zeros(decomp_y, min_length=self.ihm_pos + 1) decomp_y = np.expand_dims(decomp_y, axis=-1) # (B, T, 1) outputs.append(decomp_y) ## los los_M = self.data['los_M'][i:i + B] los_M = nn_utils.pad_zeros(los_M, min_length=self.ihm_pos + 1) los_y = self.data['los_y'][i:i + B] los_y_true = nn_utils.pad_zeros(los_y, min_length=self.ihm_pos + 1) if self.partition == 'log': los_y = [ np.array([metrics.get_bin_log(x, 10) for x in z]) for z in los_y ] if self.partition == 'custom': los_y = [ np.array([metrics.get_bin_custom(x, 10) for x in z]) for z in los_y ] los_y = nn_utils.pad_zeros(los_y, min_length=self.ihm_pos + 1) los_y = np.expand_dims(los_y, axis=-1) # (B, T, 1) outputs.append(los_y) ## pheno pheno_y = np.array(self.data['pheno_y'][i:i + B]) outputs.append(pheno_y) if self.target_repl: pheno_seq = np.expand_dims(pheno_y, axis=1).repeat( T, axis=1) # (B, T, 25) outputs.append(pheno_seq) inputs = [X, ihm_M, decomp_M, los_M] if self.return_y_true: yield (inputs, outputs, los_y_true) else: yield (inputs, outputs)
def _generator(self): B = self.batch_size while True: # convert to right format for sort_and_shuffle kvpairs = self.data.items() mas = [kv[1] for kv in kvpairs] if self.shuffle: N = len(self.data['X']) order = range(N) random.shuffle(order) tmp = [None] * len(mas) for mas_idx in range(len(mas)): tmp[mas_idx] = [None] * len(mas[mas_idx]) for i in range(N): tmp[mas_idx][i] = mas[mas_idx][order[i]] for i in range(len(kvpairs)): self.data[kvpairs[i][0]] = tmp[i] else: # sort entirely mas = common_utils.sort_and_shuffle(mas, B) for i in range(len(kvpairs)): self.data[kvpairs[i][0]] = mas[i] for i in range(0, len(self.data['X']), B): outputs = [] # X X = self.data['X'][i:i + B] X = nn_utils.pad_zeros(X, min_length=self.ihm_pos + 1) T = X.shape[1] # ihm ihm_M = np.array(self.data['ihm_M'][i:i + B]) ihm_M = np.expand_dims(ihm_M, axis=-1) # (B, 1) ihm_y = np.array(self.data['ihm_y'][i:i + B]) ihm_y = np.expand_dims(ihm_y, axis=-1) # (B, 1) outputs.append(ihm_y) if self.target_repl: ihm_seq = np.expand_dims(ihm_y, axis=-1).repeat( T, axis=1) # (B, T, 1) outputs.append(ihm_seq) # decomp decomp_M = self.data['decomp_M'][i:i + B] decomp_M = nn_utils.pad_zeros(decomp_M, min_length=self.ihm_pos + 1) decomp_y = self.data['decomp_y'][i:i + B] decomp_y = nn_utils.pad_zeros(decomp_y, min_length=self.ihm_pos + 1) decomp_y = np.expand_dims(decomp_y, axis=-1) # (B, T, 1) outputs.append(decomp_y) # los los_M = self.data['los_M'][i:i + B] los_M = nn_utils.pad_zeros(los_M, min_length=self.ihm_pos + 1) los_y = self.data['los_y'][i:i + B] los_y_true = nn_utils.pad_zeros(los_y, min_length=self.ihm_pos + 1) if self.partition == 'log': los_y = [ np.array([metrics.get_bin_log(x, 10) for x in z]) for z in los_y ] if self.partition == 'custom': los_y = [ np.array([metrics.get_bin_custom(x, 10) for x in z]) for z in los_y ] los_y = nn_utils.pad_zeros(los_y, min_length=self.ihm_pos + 1) los_y = np.expand_dims(los_y, axis=-1) # (B, T, 1) outputs.append(los_y) # pheno pheno_y = np.array(self.data['pheno_y'][i:i + B]) outputs.append(pheno_y) if self.target_repl: pheno_seq = np.expand_dims(pheno_y, axis=1).repeat( T, axis=1) # (B, T, 25) outputs.append(pheno_seq) inputs = [X, ihm_M, decomp_M, los_M] if self.return_y_true: batch_data = (inputs, outputs, los_y_true) else: batch_data = (inputs, outputs) if not self.return_names: yield batch_data else: yield { "data": batch_data, "names": self.data["names"][i:i + B], "ts": self.data["ts"][i:i + B], "decomp_ts": self.data["decomp_ts"][i:i + B], "los_ts": self.data["los_ts"][i:i + B] }
def _generator(self): B = self.batch_size while True: # convert to right format for sort_and_shuffle kv_pairs = self.data.items() mas = [kv[1] for kv in kv_pairs] if self.shuffle: N = len(self.data['X']) order = range(N) random.shuffle(order) tmp = [None] * len(mas) for mas_idx in range(len(mas)): tmp[mas_idx] = [None] * len(mas[mas_idx]) for i in range(N): tmp[mas_idx][i] = mas[mas_idx][order[i]] for i in range(len(kv_pairs)): self.data[kv_pairs[i][0]] = tmp[i] else: # sort entirely mas = common_utils.sort_and_shuffle(mas, B) for i in range(len(kv_pairs)): self.data[kv_pairs[i][0]] = mas[i] for i in range(0, len(self.data['X']), B): outputs = [] # X X = self.data['X'][i:i+B] X = nn_utils.pad_zeros(X, min_length=self.ihm_pos+1) T = X.shape[1] # ihm ihm_M = np.array(self.data['ihm_M'][i:i+B]) ihm_M = np.expand_dims(ihm_M, axis=-1) # (B, 1) ihm_y = np.array(self.data['ihm_y'][i:i+B]) ihm_y = np.expand_dims(ihm_y, axis=-1) # (B, 1) outputs.append(ihm_y) if self.target_repl: ihm_seq = np.expand_dims(ihm_y, axis=-1).repeat(T, axis=1) # (B, T, 1) outputs.append(ihm_seq) # decomp decomp_M = self.data['decomp_M'][i:i+B] decomp_M = nn_utils.pad_zeros(decomp_M, min_length=self.ihm_pos+1) decomp_y = self.data['decomp_y'][i:i+B] decomp_y = nn_utils.pad_zeros(decomp_y, min_length=self.ihm_pos+1) decomp_y = np.expand_dims(decomp_y, axis=-1) # (B, T, 1) outputs.append(decomp_y) # los los_M = self.data['los_M'][i:i+B] los_M = nn_utils.pad_zeros(los_M, min_length=self.ihm_pos+1) los_y = self.data['los_y'][i:i+B] los_y_true = nn_utils.pad_zeros(los_y, min_length=self.ihm_pos+1) if self.partition == 'log': los_y = [np.array([metrics.get_bin_log(x, 10) for x in z]) for z in los_y] if self.partition == 'custom': los_y = [np.array([metrics.get_bin_custom(x, 10) for x in z]) for z in los_y] los_y = nn_utils.pad_zeros(los_y, min_length=self.ihm_pos+1) los_y = np.expand_dims(los_y, axis=-1) # (B, T, 1) outputs.append(los_y) # pheno pheno_y = np.array(self.data['pheno_y'][i:i+B]) outputs.append(pheno_y) if self.target_repl: pheno_seq = np.expand_dims(pheno_y, axis=1).repeat(T, axis=1) # (B, T, 25) outputs.append(pheno_seq) inputs = [X, ihm_M, decomp_M, los_M] if self.return_y_true: batch_data = (inputs, outputs, los_y_true) else: batch_data = (inputs, outputs) if not self.return_names: yield batch_data else: yield {'data': batch_data, 'names': self.data['names'][i:i+B], 'decomp_ts': self.data['decomp_ts'][i:i+B], 'los_ts': self.data['los_ts'][i:i+B], 'pheno_ts': self.data['pheno_ts'][i:i + B]}
def process_input(self, data_raw): return (nn_utils.pad_zeros(data_raw[0]).astype(np.float32), map(len, data_raw[0]), data_raw[1])
listfile='../../data/in-hospital-mortality/test_listfile.csv', period_length=48.0) ihm_y_true = [] ihm_pred = [] n_examples = test_reader.get_number_of_examples() for i in range(0, n_examples, args.batch_size): j = min(i + args.batch_size, n_examples) (X, ts, labels, header) = read_chunk(test_reader, j - i) for i in range(args.batch_size): X[i] = discretizer.transform(X[i], end=48.0)[0] X[i] = normalizer.transform(X[i]) X = nn_utils.pad_zeros(X, min_length=args_dict['ihm_pos']+1) T = X.shape[1] ihm_M = np.ones(shape=(args.batch_size,1)) decomp_M = np.ones(shape=(args.batch_size, T)) los_M = np.ones(shape=(args.batch_size, T)) pred = model.predict([X, ihm_M, decomp_M, los_M])[0] ihm_y_true += labels ihm_pred += list(pred.flatten()) print "\n ================= 48h mortality ================" ihm_pred = np.array(ihm_pred) ihm_pred = np.stack([1-ihm_pred, ihm_pred], axis=1) ihm_ret = metrics.print_metrics_binary(ihm_y_true, ihm_pred) else: