def _generator(self): B = self.batch_size while True: if self.shuffle: self.reader.random_shuffle() remaining = self.n_examples while remaining > 0: current_size = min(self.chunk_size, remaining) remaining -= current_size (data, ts, labels, header) = read_chunk(self.reader, current_size) data = preprocess_chunk(data, ts, self.discretizer, self.normalizer) data = (data, labels) data = common_utils.sort_and_shuffle(data, B) for i in range(0, current_size, B): X = nn_utils.pad_zeros(data[0][i:i + B]) y = data[1][i:i + B] y_true = np.array(y) if self.partition == 'log': y = [metrics.get_bin_log(x, 10) for x in y] if self.partition == 'custom': y = [metrics.get_bin_custom(x, 10) for x in y] y = np.array(y) if self.return_y_true: yield (X, y, y_true) else: yield (X, y)
def _generator(self): B = self.batch_size while True: if self.shuffle: self.reader.random_shuffle() remaining = self.n_examples while remaining > 0: current_size = min(self.chunk_size, remaining) remaining -= current_size ret = common_utils.read_chunk(self.reader, current_size) Xs = ret["X"] ts = ret["t"] ys = ret["y"] names = ret["name"] Xs, Ts = preprocess_chunk_time(Xs, ts, self.discretizer, self.normalizer, max_seq_len=1200, mask_value=0.) (Xs, Ts, ys, ts, names) = common_utils.sort_and_shuffle( [Xs, Ts, ys, ts, names], B) for i in range(0, current_size, B): X = common_utils.pad_zeros(Xs[i:i + B]) T = common_utils.pad_zeros(Ts[i:i + B]) y = ys[i:i + B] y_true = np.array(y) batch_names = names[i:i + B] batch_ts = ts[i:i + B] if self.partition == 'log': y = [metrics.get_bin_log(x, 10) for x in y] if self.partition == 'custom': y = [metrics.get_bin_custom(x, 10) for x in y] y = np.array(y) if self.use_time: if self.return_y_true: batch_data = ([X, T], y, y_true) else: batch_data = ([X, T], y) else: if self.return_y_true: batch_data = (X, y, y_true) else: batch_data = (X, y) if not self.return_names: yield batch_data else: yield { "data": batch_data, "names": batch_names, "ts": batch_ts }
def _generator(self): B = self.batch_size while True: if self.shuffle: N = len(self.data[1]) order = list(range(N)) random.shuffle(order) tmp_data = [[[None]*N, [None]*N], [None]*N] tmp_names = [None] * N tmp_ts = [None] * N for i in range(N): tmp_data[0][0][i] = self.data[0][0][order[i]] tmp_data[0][1][i] = self.data[0][1][order[i]] tmp_data[1][i] = self.data[1][order[i]] tmp_names[i] = self.names[order[i]] tmp_ts[i] = self.ts[order[i]] self.data = tmp_data self.names = tmp_names self.ts = tmp_ts else: # sort entirely Xs = self.data[0][0] masks = self.data[0][1] ys = self.data[1] (Xs, masks, ys, self.names, self.ts) = common_utils.sort_and_shuffle([Xs, masks, ys, self.names, self.ts], B) self.data = [[Xs, masks], ys] for i in range(0, len(self.data[1]), B): X = self.data[0][0][i:i+B] mask = self.data[0][1][i:i+B] y = self.data[1][i:i+B] names = self.names[i:i+B] ts = self.ts[i:i+B] y_true = [np.array(x) for x in y] y_true = common_utils.pad_zeros(y_true) y_true = np.expand_dims(y_true, axis=-1) if self.partition == 'log': y = [np.array([metrics.get_bin_log(x, 10) for x in z]) for z in y] if self.partition == 'custom': y = [np.array([metrics.get_bin_custom(x, 10) for x in z]) for z in y] X = common_utils.pad_zeros(X) # (B, T, D) mask = common_utils.pad_zeros(mask) # (B, T) y = common_utils.pad_zeros(y) y = np.expand_dims(y, axis=-1) if self.return_y_true: batch_data = ([X, mask], y, y_true) else: batch_data = ([X, mask], y) if not self.return_names: yield batch_data else: yield {"data": batch_data, "names": names, "ts": ts}
def _generator(self): print(f"examples: {self.n_examples} steps: {self.steps}") B = self.batch_size while True: if self.shuffle: self.reader.random_shuffle() remaining = int(self.n_examples * 1.15) while remaining > 0: current_size = min(self.chunk_size, remaining) remaining -= current_size print(f"Reading chunk size: {current_size} with {remaining} remaining") ret = common_utils.read_chunk(self.reader, current_size) Xs = ret["X"] ts = ret["t"] ys = ret["y"] names = ret["name"] print(f"len(Xs): {len(Xs)}") Xs = preprocess_chunk(Xs, ts, self.discretizer, self.normalizer) (Xs, ys, ts, names) = common_utils.sort_and_shuffle([Xs, ys, ts, names], B) for i in range(0, current_size, B): X = common_utils.pad_zeros(Xs[i:i + B]) y = ys[i:i+B] y_true = np.array(y) batch_names = names[i:i+B] batch_ts = ts[i:i+B] if self.partition == 'log': y = [metrics.get_bin_log(x, 10) for x in y] if self.partition == 'custom': y = [metrics.get_bin_custom(x, 10) for x in y] y = np.array(y) #aflanders: debug-Convert to tensors # X = tf.convert_to_tensor(X) # y = tf.convert_to_tensor(y) # y_true = tf.convert_to_tensor(y_true) #aflanders: debug-Convert to tensors if self.return_y_true: batch_data = (X, y, y_true) else: batch_data = (X, y) if not self.return_names: yield batch_data else: yield {"data": batch_data, "names": batch_names, "ts": batch_ts}
def process_input(self, data_raw): Xs = nn_utils.pad_zeros(data_raw[0]).astype(np.float32) lens = map(len, data_raw[0]) ys = np.array(data_raw[1]).astype(np.float32) bin_ids = [metrics.get_bin_custom(x, self.nbins) for x in ys] for x in bin_ids: assert x >= 0 and x < self.nbins return (Xs, lens, np.array(bin_ids, dtype=np.int32), ys)
def _generator(self): B = self.batch_size while True: if self.shuffle: N = len(self.data[1]) order = range(N) random.shuffle(order) tmp = [[[None] * N, [None] * N], [None] * N] for i in range(N): tmp[0][0][i] = self.data[0][0][order[i]] tmp[0][1][i] = self.data[0][1][order[i]] tmp[1][i] = self.data[1][order[i]] self.data = tmp else: # sort entirely Xs = self.data[0][0] masks = self.data[0][1] ys = self.data[1] (Xs, masks, ys) = common_utils.sort_and_shuffle([Xs, masks, ys], B) self.data = [[Xs, masks], ys] for i in range(0, len(self.data[1]), B): X = self.data[0][0][i:i + B] mask = self.data[0][1][i:i + B] y = self.data[1][i:i + B] y_true = [np.array(x) for x in y] y_true = nn_utils.pad_zeros(y_true) y_true = np.expand_dims(y_true, axis=-1) if self.partition == 'log': y = [ np.array([metrics.get_bin_log(x, 10) for x in z]) for z in y ] if self.partition == 'custom': y = [ np.array([metrics.get_bin_custom(x, 10) for x in z]) for z in y ] X = nn_utils.pad_zeros(X) # (B, T, D) mask = nn_utils.pad_zeros(mask) # (B, T) y = nn_utils.pad_zeros(y) y = np.expand_dims(y, axis=-1) if self.return_y_true: yield ([X, mask], y, y_true) else: yield ([X, mask], y)
def read_and_extract_features(reader, count): read_chunk_size = 1000 #assert (count % read_chunk_size == 0) Xs = [] ys = [] for i in range(count // read_chunk_size): (chunk, ts, y, header) = utils.read_chunk(reader, read_chunk_size) X = common_utils.extract_features_from_rawdata(chunk, header, args.period, args.features) Xs.append(X) ys += y Xs = np.concatenate(Xs, axis=0) bins = np.array([one_hot(metrics.get_bin_custom(x, nbins)) for x in ys]) return (Xs, bins, ys)
def read_and_extract_features(reader, count, period, features): read_chunk_size = 1000 Xs = [] ys = [] names = [] ts = [] for i in range(0, count, read_chunk_size): j = min(count, i + read_chunk_size) ret = common_utils.read_chunk(reader, j - i) X = common_utils.extract_features_from_rawdata(ret['X'], ret['header'], period, features) Xs.append(X) ys += ret['y'] names += ret['name'] ts += ret['t'] Xs = np.concatenate(Xs, axis=0) bins = np.array([one_hot(metrics.get_bin_custom(x, n_bins)) for x in ys]) return (Xs, bins, ys, names, ts)
def getitem(self, index, return_y_true=False): print(f"Start: {index} from reader:{self.reader.listfile}") B = self.batch_size ret = common_utils.read_chunk_index(self.reader, index*B, B) Xs = ret["X"] ts = ret["t"] ys = ret["y"] names = ret["name"] Xs = preprocess_chunk(Xs, ts, self.discretizer, self.normalizer) #(Xs, ys, ts, names) = common_utils.sort_and_shuffle([Xs, ys, ts, names], B) i=0 X = common_utils.pad_zeros(Xs[i:i + B]) y = ys[i:i+B] y_true = np.array(y) batch_names = names[i:i+B] batch_ts = ts[i:i+B] if self.partition == 'log': y = [metrics.get_bin_log(x, 10) for x in y] if self.partition == 'custom': y = [metrics.get_bin_custom(x, 10) for x in y] y = np.array(y) #aflanders: debug-Convert to tensors # X = tf.convert_to_tensor(X) # y = tf.convert_to_tensor(y) # y_true = tf.convert_to_tensor(y_true) #aflanders: debug-Convert to tensors if return_y_true: batch_data = (X, y, y_true) else: batch_data = (X, y) print(f"End: {index} from reader:{self.reader.listfile}") if not self.return_names: return batch_data else: return {"data": batch_data, "names": batch_names, "ts": batch_ts}
def _generator(self): B = self.batch_size while True: # convert to right format for sort_and_shuffle Xs = self.data[0][0] masks = self.data[0][1] ys = self.data[1] (Xs, masks, ys) = common_utils.sort_and_shuffle([Xs, masks, ys], B) self.data = [[Xs, masks], ys] for i in range(0, len(self.data[1]), B): X = self.data[0][0][i:i + B] mask = self.data[0][1][i:i + B] y = self.data[1][i:i + B] y_true = [np.array(x) for x in y] y_true = nn_utils.pad_zeros(y_true) y_true = np.expand_dims(y_true, axis=-1) if self.partition == 'log': y = [ np.array([metrics.get_bin_log(x, 10) for x in z]) for z in y ] if self.partition == 'custom': y = [ np.array([metrics.get_bin_custom(x, 10) for x in z]) for z in y ] X = nn_utils.pad_zeros(X) # (B, T, D) mask = nn_utils.pad_zeros(mask) # (B, T) y = nn_utils.pad_zeros(y) y = np.expand_dims(y, axis=-1) if self.return_y_true: yield ([X, mask], y, y_true) else: yield ([X, mask], y)
def _generator(self): B = self.batch_size while True: # convert to right format for sort_and_shuffle kvpairs = self.data.items() mas = [kv[1] for kv in kvpairs] mas = common_utils.sort_and_shuffle(mas, B) for i in range(len(kvpairs)): self.data[kvpairs[i][0]] = mas[i] for i in range(0, len(self.data['X']), B): outputs = [] # X X = self.data['X'][i:i + B] X = nn_utils.pad_zeros(X, min_length=self.ihm_pos + 1) T = X.shape[1] ## ihm ihm_M = np.array(self.data['ihm_M'][i:i + B]) ihm_M = np.expand_dims(ihm_M, axis=-1) # (B, 1) ihm_y = np.array(self.data['ihm_y'][i:i + B]) ihm_y = np.expand_dims(ihm_y, axis=-1) # (B, 1) outputs.append(ihm_y) if self.target_repl: ihm_seq = np.expand_dims(ihm_y, axis=-1).repeat( T, axis=1) # (B, T, 1) outputs.append(ihm_seq) ## decomp decomp_M = self.data['decomp_M'][i:i + B] decomp_M = nn_utils.pad_zeros(decomp_M, min_length=self.ihm_pos + 1) decomp_y = self.data['decomp_y'][i:i + B] decomp_y = nn_utils.pad_zeros(decomp_y, min_length=self.ihm_pos + 1) decomp_y = np.expand_dims(decomp_y, axis=-1) # (B, T, 1) outputs.append(decomp_y) ## los los_M = self.data['los_M'][i:i + B] los_M = nn_utils.pad_zeros(los_M, min_length=self.ihm_pos + 1) los_y = self.data['los_y'][i:i + B] los_y_true = nn_utils.pad_zeros(los_y, min_length=self.ihm_pos + 1) if self.partition == 'log': los_y = [ np.array([metrics.get_bin_log(x, 10) for x in z]) for z in los_y ] if self.partition == 'custom': los_y = [ np.array([metrics.get_bin_custom(x, 10) for x in z]) for z in los_y ] los_y = nn_utils.pad_zeros(los_y, min_length=self.ihm_pos + 1) los_y = np.expand_dims(los_y, axis=-1) # (B, T, 1) outputs.append(los_y) ## pheno pheno_y = np.array(self.data['pheno_y'][i:i + B]) outputs.append(pheno_y) if self.target_repl: pheno_seq = np.expand_dims(pheno_y, axis=1).repeat( T, axis=1) # (B, T, 25) outputs.append(pheno_seq) inputs = [X, ihm_M, decomp_M, los_M] if self.return_y_true: yield (inputs, outputs, los_y_true) else: yield (inputs, outputs)
def _generator(self): B = self.batch_size while True: # convert to right format for sort_and_shuffle kvpairs = self.data.items() mas = [kv[1] for kv in kvpairs] if self.shuffle: N = len(self.data['X']) order = range(N) random.shuffle(order) tmp = [None] * len(mas) for mas_idx in range(len(mas)): tmp[mas_idx] = [None] * len(mas[mas_idx]) for i in range(N): tmp[mas_idx][i] = mas[mas_idx][order[i]] for i in range(len(kvpairs)): self.data[kvpairs[i][0]] = tmp[i] else: # sort entirely mas = common_utils.sort_and_shuffle(mas, B) for i in range(len(kvpairs)): self.data[kvpairs[i][0]] = mas[i] for i in range(0, len(self.data['X']), B): outputs = [] # X X = self.data['X'][i:i + B] X = nn_utils.pad_zeros(X, min_length=self.ihm_pos + 1) T = X.shape[1] # ihm ihm_M = np.array(self.data['ihm_M'][i:i + B]) ihm_M = np.expand_dims(ihm_M, axis=-1) # (B, 1) ihm_y = np.array(self.data['ihm_y'][i:i + B]) ihm_y = np.expand_dims(ihm_y, axis=-1) # (B, 1) outputs.append(ihm_y) if self.target_repl: ihm_seq = np.expand_dims(ihm_y, axis=-1).repeat( T, axis=1) # (B, T, 1) outputs.append(ihm_seq) # decomp decomp_M = self.data['decomp_M'][i:i + B] decomp_M = nn_utils.pad_zeros(decomp_M, min_length=self.ihm_pos + 1) decomp_y = self.data['decomp_y'][i:i + B] decomp_y = nn_utils.pad_zeros(decomp_y, min_length=self.ihm_pos + 1) decomp_y = np.expand_dims(decomp_y, axis=-1) # (B, T, 1) outputs.append(decomp_y) # los los_M = self.data['los_M'][i:i + B] los_M = nn_utils.pad_zeros(los_M, min_length=self.ihm_pos + 1) los_y = self.data['los_y'][i:i + B] los_y_true = nn_utils.pad_zeros(los_y, min_length=self.ihm_pos + 1) if self.partition == 'log': los_y = [ np.array([metrics.get_bin_log(x, 10) for x in z]) for z in los_y ] if self.partition == 'custom': los_y = [ np.array([metrics.get_bin_custom(x, 10) for x in z]) for z in los_y ] los_y = nn_utils.pad_zeros(los_y, min_length=self.ihm_pos + 1) los_y = np.expand_dims(los_y, axis=-1) # (B, T, 1) outputs.append(los_y) # pheno pheno_y = np.array(self.data['pheno_y'][i:i + B]) outputs.append(pheno_y) if self.target_repl: pheno_seq = np.expand_dims(pheno_y, axis=1).repeat( T, axis=1) # (B, T, 25) outputs.append(pheno_seq) inputs = [X, ihm_M, decomp_M, los_M] if self.return_y_true: batch_data = (inputs, outputs, los_y_true) else: batch_data = (inputs, outputs) if not self.return_names: yield batch_data else: yield { "data": batch_data, "names": self.data["names"][i:i + B], "ts": self.data["ts"][i:i + B], "decomp_ts": self.data["decomp_ts"][i:i + B], "los_ts": self.data["los_ts"][i:i + B] }
def _generator(self): B = self.batch_size while True: # convert to right format for sort_and_shuffle kv_pairs = self.data.items() mas = [kv[1] for kv in kv_pairs] if self.shuffle: N = len(self.data['X']) order = range(N) random.shuffle(order) tmp = [None] * len(mas) for mas_idx in range(len(mas)): tmp[mas_idx] = [None] * len(mas[mas_idx]) for i in range(N): tmp[mas_idx][i] = mas[mas_idx][order[i]] for i in range(len(kv_pairs)): self.data[kv_pairs[i][0]] = tmp[i] else: # sort entirely mas = common_utils.sort_and_shuffle(mas, B) for i in range(len(kv_pairs)): self.data[kv_pairs[i][0]] = mas[i] for i in range(0, len(self.data['X']), B): outputs = [] # X X = self.data['X'][i:i+B] X = nn_utils.pad_zeros(X, min_length=self.ihm_pos+1) T = X.shape[1] # ihm ihm_M = np.array(self.data['ihm_M'][i:i+B]) ihm_M = np.expand_dims(ihm_M, axis=-1) # (B, 1) ihm_y = np.array(self.data['ihm_y'][i:i+B]) ihm_y = np.expand_dims(ihm_y, axis=-1) # (B, 1) outputs.append(ihm_y) if self.target_repl: ihm_seq = np.expand_dims(ihm_y, axis=-1).repeat(T, axis=1) # (B, T, 1) outputs.append(ihm_seq) # decomp decomp_M = self.data['decomp_M'][i:i+B] decomp_M = nn_utils.pad_zeros(decomp_M, min_length=self.ihm_pos+1) decomp_y = self.data['decomp_y'][i:i+B] decomp_y = nn_utils.pad_zeros(decomp_y, min_length=self.ihm_pos+1) decomp_y = np.expand_dims(decomp_y, axis=-1) # (B, T, 1) outputs.append(decomp_y) # los los_M = self.data['los_M'][i:i+B] los_M = nn_utils.pad_zeros(los_M, min_length=self.ihm_pos+1) los_y = self.data['los_y'][i:i+B] los_y_true = nn_utils.pad_zeros(los_y, min_length=self.ihm_pos+1) if self.partition == 'log': los_y = [np.array([metrics.get_bin_log(x, 10) for x in z]) for z in los_y] if self.partition == 'custom': los_y = [np.array([metrics.get_bin_custom(x, 10) for x in z]) for z in los_y] los_y = nn_utils.pad_zeros(los_y, min_length=self.ihm_pos+1) los_y = np.expand_dims(los_y, axis=-1) # (B, T, 1) outputs.append(los_y) # pheno pheno_y = np.array(self.data['pheno_y'][i:i+B]) outputs.append(pheno_y) if self.target_repl: pheno_seq = np.expand_dims(pheno_y, axis=1).repeat(T, axis=1) # (B, T, 25) outputs.append(pheno_seq) inputs = [X, ihm_M, decomp_M, los_M] if self.return_y_true: batch_data = (inputs, outputs, los_y_true) else: batch_data = (inputs, outputs) if not self.return_names: yield batch_data else: yield {'data': batch_data, 'names': self.data['names'][i:i+B], 'decomp_ts': self.data['decomp_ts'][i:i+B], 'los_ts': self.data['los_ts'][i:i+B], 'pheno_ts': self.data['pheno_ts'][i:i + B]}
def _generator(self): B = self.batch_size while True: # convert to right format for sort_and_shuffle kv_pairs = list(self.data.items()) data_index = [pair[0] for pair in kv_pairs].index('X') if data_index > 0: kv_pairs[0], kv_pairs[data_index] = kv_pairs[ data_index], kv_pairs[0] mas = [kv[1] for kv in kv_pairs] if self.shuffle: N = len(self.data['X']) order = list(range(N)) random.shuffle(order) tmp = [None] * len(mas) for mas_idx in range(len(mas)): tmp[mas_idx] = [None] * len(mas[mas_idx]) for i in range(N): tmp[mas_idx][i] = mas[mas_idx][order[i]] for i in range(len(kv_pairs)): self.data[kv_pairs[i][0]] = tmp[i] else: # sort entirely mas = common_utils.sort_and_shuffle(mas, B) for i in range(len(kv_pairs)): self.data[kv_pairs[i][0]] = mas[i] for i in range(0, len(self.data['X']), B): outputs = [] # X X = self.data['X'][i:i + B] X = common_utils.pad_zeros(X, min_length=self.ihm_pos + 1) T = X.shape[1] # ihm ihm_M = np.array(self.data['ihm_M'][i:i + B]) ihm_M = np.expand_dims(ihm_M, axis=-1) # (B, 1) ihm_y = np.array(self.data['ihm_y'][i:i + B]) ihm_y = np.expand_dims(ihm_y, axis=-1) # (B, 1) outputs.append(ihm_y) if self.target_repl: ihm_seq = np.expand_dims(ihm_y, axis=-1).repeat( T, axis=1) # (B, T, 1) outputs.append(ihm_seq) # los los_M = self.data['los_M'][i:i + B] los_M = common_utils.pad_zeros(los_M, min_length=self.ihm_pos + 1) los_y = self.data['los_y'][i:i + B] los_y_true = common_utils.pad_zeros(los_y, min_length=self.ihm_pos + 1) if self.partition == 'log': los_y = [ np.array([metrics.get_bin_log(x, 10) for x in z]) for z in los_y ] if self.partition == 'custom': los_y = [ np.array([metrics.get_bin_custom(x, 10) for x in z]) for z in los_y ] los_y = common_utils.pad_zeros(los_y, min_length=self.ihm_pos + 1) los_y = np.expand_dims(los_y, axis=-1) # (B, T, 1) outputs.append(los_y) inputs = [X, ihm_M, los_M] if self.return_y_true: batch_data = (inputs, outputs, los_y_true) else: batch_data = (inputs, outputs) if not self.return_names: yield batch_data else: yield { 'data': batch_data, 'names': self.data['names'][i:i + B], 'los_ts': self.data['los_ts'][i:i + B] }